rasterizer_cache: Remove reliance on the System singleton

Rather than have a transparent dependency, we can make it explicit in the interface. This also gets rid of the need to put the core include in a header.
gl_resource_manager: Amend clang-format discrepancies
2018-11-08 06:16:38 -05:00 · 2018-11-08 00:23:45 -05:00 · 2018-11-07 20:59:01 -08:00 · 2018-11-07 20:43:54 -08:00 · 2018-11-07 20:41:06 -08:00 · 2018-11-08 01:07:14 +11:00
141 changed files with 3730 additions and 1984 deletions
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -121,7 +121,8 @@ CubebSink::CubebSink(std::string target_device_name) {
            const auto collection_end{collection.device + collection.count};
            const auto device{
                std::find_if(collection.device, collection_end, [&](const cubeb_device_info& info) {
-                    return target_device_name == info.friendly_name;
+                    return info.friendly_name != nullptr &&
+                           target_device_name == info.friendly_name;
                })};
            if (device != collection_end) {
                output_device = device->devid;
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -11,7 +11,6 @@
 #include "audio_core/stream.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
-#include "common/microprofile.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/settings.h"
@@ -104,10 +103,7 @@ void Stream::PlayNextBuffer() {
    CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
 }

-MICROPROFILE_DEFINE(AudioOutput, "Audio", "ReleaseActiveBuffer", MP_RGB(100, 100, 255));
-
 void Stream::ReleaseActiveBuffer() {
-    MICROPROFILE_SCOPE(AudioOutput);
    ASSERT(active_buffer);
    released_buffers.push(std::move(active_buffer));
    release_callback();
--- a/src/audio_core/time_stretch.cpp
+++ b/src/audio_core/time_stretch.cpp
@@ -32,10 +32,10 @@ std::size_t TimeStretcher::Process(const s16* in, std::size_t num_in, s16* out,
    // We were given actual_samples number of samples, and num_samples were requested from us.
    double current_ratio = static_cast<double>(num_in) / static_cast<double>(num_out);

-    const double max_latency = 1.0; // seconds
+    const double max_latency = 0.25; // seconds
    const double max_backlog = m_sample_rate * max_latency;
    const double backlog_fullness = m_sound_touch.numSamples() / max_backlog;
-    if (backlog_fullness > 5.0) {
+    if (backlog_fullness > 4.0) {
        // Too many samples in backlog: Don't push anymore on
        num_in = 0;
    }
@@ -49,7 +49,7 @@ std::size_t TimeStretcher::Process(const s16* in, std::size_t num_in, s16* out,

    // This low-pass filter smoothes out variance in the calculated stretch ratio.
    // The time-scale determines how responsive this filter is.
-    constexpr double lpf_time_scale = 2.0; // seconds
+    constexpr double lpf_time_scale = 0.712; // seconds
    const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
    m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);

--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -12,7 +12,8 @@
 #include <thread>
 #include <vector>
 #ifdef _WIN32
-#include <share.h> // For _SH_DENYWR
+#include <share.h>   // For _SH_DENYWR
+#include <windows.h> // For OutputDebugStringA
 #else
 #define _SH_DENYWR 0
 #endif
@@ -139,12 +140,18 @@ void FileBackend::Write(const Entry& entry) {
    if (!file.IsOpen() || bytes_written > MAX_BYTES_WRITTEN) {
        return;
    }
-    bytes_written += file.WriteString(FormatLogMessage(entry) + '\n');
+    bytes_written += file.WriteString(FormatLogMessage(entry).append(1, '\n'));
    if (entry.log_level >= Level::Error) {
        file.Flush();
    }
 }

+void DebuggerBackend::Write(const Entry& entry) {
+#ifdef _WIN32
+    ::OutputDebugStringA(FormatLogMessage(entry).append(1, '\n').c_str());
+#endif
+}
+
 /// Macro listing all log classes. Code should define CLS and SUB as desired before invoking this.
 #define ALL_LOG_CLASSES()                                                                          \
    CLS(Log)                                                                                       \
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -103,6 +103,20 @@ private:
    std::size_t bytes_written;
 };

+/**
+ * Backend that writes to Visual Studio's output window
+ */
+class DebuggerBackend : public Backend {
+public:
+    static const char* Name() {
+        return "debugger";
+    }
+    const char* GetName() const override {
+        return Name();
+    }
+    void Write(const Entry& entry) override;
+};
+
 void AddBackend(std::unique_ptr<Backend> backend);

 void RemoveBackend(std::string_view backend_name);
--- a/src/common/telemetry.h
+++ b/src/common/telemetry.h
@@ -153,6 +153,7 @@ struct VisitorInterface : NonCopyable {

    /// Completion method, called once all fields have been visited
    virtual void Complete() = 0;
+    virtual bool SubmitTestcase() = 0;
 };

 /**
@@ -178,6 +179,9 @@ struct NullVisitor : public VisitorInterface {
    void Visit(const Field<std::chrono::microseconds>& /*field*/) override {}

    void Complete() override {}
+    bool SubmitTestcase() override {
+        return false;
+    }
 };

 /// Appends build-specific information to the given FieldCollection,
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -185,7 +185,7 @@ struct System::Impl {
            LOG_CRITICAL(Core, "Failed to obtain loader for {}!", filepath);
            return ResultStatus::ErrorGetLoader;
        }
-        std::pair<boost::optional<u32>, Loader::ResultStatus> system_mode =
+        std::pair<std::optional<u32>, Loader::ResultStatus> system_mode =
            app_loader->LoadKernelSystemMode();

        if (system_mode.second != Loader::ResultStatus::Success) {
@@ -312,6 +312,10 @@ Cpu& System::CurrentCpuCore() {
    return impl->CurrentCpuCore();
 }

+const Cpu& System::CurrentCpuCore() const {
+    return impl->CurrentCpuCore();
+}
+
 System::ResultStatus System::RunLoop(bool tight_loop) {
    return impl->RunLoop(tight_loop);
 }
@@ -342,7 +346,11 @@ PerfStatsResults System::GetAndResetPerfStats() {
    return impl->GetAndResetPerfStats();
 }

-Core::TelemetrySession& System::TelemetrySession() const {
+TelemetrySession& System::TelemetrySession() {
+    return *impl->telemetry_session;
+}
+
+const TelemetrySession& System::TelemetrySession() const {
    return *impl->telemetry_session;
 }

@@ -350,7 +358,11 @@ ARM_Interface& System::CurrentArmInterface() {
    return CurrentCpuCore().ArmInterface();
 }

-std::size_t System::CurrentCoreIndex() {
+const ARM_Interface& System::CurrentArmInterface() const {
+    return CurrentCpuCore().ArmInterface();
+}
+
+std::size_t System::CurrentCoreIndex() const {
    return CurrentCpuCore().CoreIndex();
 }

@@ -358,6 +370,10 @@ Kernel::Scheduler& System::CurrentScheduler() {
    return CurrentCpuCore().Scheduler();
 }

+const Kernel::Scheduler& System::CurrentScheduler() const {
+    return CurrentCpuCore().Scheduler();
+}
+
 Kernel::Scheduler& System::Scheduler(std::size_t core_index) {
    return CpuCore(core_index).Scheduler();
 }
@@ -378,6 +394,10 @@ ARM_Interface& System::ArmInterface(std::size_t core_index) {
    return CpuCore(core_index).ArmInterface();
 }

+const ARM_Interface& System::ArmInterface(std::size_t core_index) const {
+    return CpuCore(core_index).ArmInterface();
+}
+
 Cpu& System::CpuCore(std::size_t core_index) {
    ASSERT(core_index < NUM_CPU_CORES);
    return *impl->cpu_cores[core_index];
@@ -392,6 +412,10 @@ ExclusiveMonitor& System::Monitor() {
    return *impl->cpu_exclusive_monitor;
 }

+const ExclusiveMonitor& System::Monitor() const {
+    return *impl->cpu_exclusive_monitor;
+}
+
 Tegra::GPU& System::GPU() {
    return *impl->gpu_core;
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -129,11 +129,11 @@ public:
     */
    bool IsPoweredOn() const;

-    /**
-     * Returns a reference to the telemetry session for this emulation session.
-     * @returns Reference to the telemetry session.
-     */
-    Core::TelemetrySession& TelemetrySession() const;
+    /// Gets a reference to the telemetry session for this emulation session.
+    Core::TelemetrySession& TelemetrySession();
+
+    /// Gets a reference to the telemetry session for this emulation session.
+    const Core::TelemetrySession& TelemetrySession() const;

    /// Prepare the core emulation for a reschedule
    void PrepareReschedule();
@@ -144,24 +144,36 @@ public:
    /// Gets an ARM interface to the CPU core that is currently running
    ARM_Interface& CurrentArmInterface();

+    /// Gets an ARM interface to the CPU core that is currently running
+    const ARM_Interface& CurrentArmInterface() const;
+
    /// Gets the index of the currently running CPU core
-    std::size_t CurrentCoreIndex();
+    std::size_t CurrentCoreIndex() const;

    /// Gets the scheduler for the CPU core that is currently running
    Kernel::Scheduler& CurrentScheduler();

-    /// Gets an ARM interface to the CPU core with the specified index
+    /// Gets the scheduler for the CPU core that is currently running
+    const Kernel::Scheduler& CurrentScheduler() const;
+
+    /// Gets a reference to an ARM interface for the CPU core with the specified index
    ARM_Interface& ArmInterface(std::size_t core_index);

+    /// Gets a const reference to an ARM interface from the CPU core with the specified index
+    const ARM_Interface& ArmInterface(std::size_t core_index) const;
+
    /// Gets a CPU interface to the CPU core with the specified index
    Cpu& CpuCore(std::size_t core_index);

    /// Gets a CPU interface to the CPU core with the specified index
    const Cpu& CpuCore(std::size_t core_index) const;

-    /// Gets the exclusive monitor
+    /// Gets a reference to the exclusive monitor
    ExclusiveMonitor& Monitor();

+    /// Gets a constant reference to the exclusive monitor
+    const ExclusiveMonitor& Monitor() const;
+
    /// Gets a mutable reference to the GPU interface
    Tegra::GPU& GPU();

@@ -230,6 +242,9 @@ private:
    /// Returns the currently running CPU core
    Cpu& CurrentCpuCore();

+    /// Returns the currently running CPU core
+    const Cpu& CurrentCpuCore() const;
+
    /**
     * Initialize the emulated system.
     * @param emu_window Reference to the host-system window used for video output and keyboard
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -141,28 +141,28 @@ Key128 DeriveKeyblobMACKey(const Key128& keyblob_key, const Key128& mac_source)
    return mac_key;
 }

-boost::optional<Key128> DeriveSDSeed() {
+std::optional<Key128> DeriveSDSeed() {
    const FileUtil::IOFile save_43(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) +
                                       "/system/save/8000000000000043",
                                   "rb+");
    if (!save_43.IsOpen())
-        return boost::none;
+        return {};

    const FileUtil::IOFile sd_private(
        FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir) + "/Nintendo/Contents/private", "rb+");
    if (!sd_private.IsOpen())
-        return boost::none;
+        return {};

    std::array<u8, 0x10> private_seed{};
    if (sd_private.ReadBytes(private_seed.data(), private_seed.size()) != private_seed.size()) {
-        return boost::none;
+        return {};
    }

    std::array<u8, 0x10> buffer{};
    std::size_t offset = 0;
    for (; offset + 0x10 < save_43.GetSize(); ++offset) {
        if (!save_43.Seek(offset, SEEK_SET)) {
-            return boost::none;
+            return {};
        }

        save_43.ReadBytes(buffer.data(), buffer.size());
@@ -172,12 +172,12 @@ boost::optional<Key128> DeriveSDSeed() {
    }

    if (!save_43.Seek(offset + 0x10, SEEK_SET)) {
-        return boost::none;
+        return {};
    }

    Key128 seed{};
    if (save_43.ReadBytes(seed.data(), seed.size()) != seed.size()) {
-        return boost::none;
+        return {};
    }
    return seed;
 }
@@ -291,26 +291,26 @@ static std::array<u8, target_size> MGF1(const std::array<u8, in_size>& seed) {
 }

 template <size_t size>
-static boost::optional<u64> FindTicketOffset(const std::array<u8, size>& data) {
+static std::optional<u64> FindTicketOffset(const std::array<u8, size>& data) {
    u64 offset = 0;
    for (size_t i = 0x20; i < data.size() - 0x10; ++i) {
        if (data[i] == 0x1) {
            offset = i + 1;
            break;
        } else if (data[i] != 0x0) {
-            return boost::none;
+            return {};
        }
    }

    return offset;
 }

-boost::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
-                                                       const RSAKeyPair<2048>& key) {
+std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
+                                                     const RSAKeyPair<2048>& key) {
    u32 cert_authority;
    std::memcpy(&cert_authority, ticket.data() + 0x140, sizeof(cert_authority));
    if (cert_authority == 0)
-        return boost::none;
+        return {};
    if (cert_authority != Common::MakeMagic('R', 'o', 'o', 't')) {
        LOG_INFO(Crypto,
                 "Attempting to parse ticket with non-standard certificate authority {:08X}.",
@@ -321,7 +321,7 @@ boost::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
    std::memcpy(rights_id.data(), ticket.data() + 0x2A0, sizeof(Key128));

    if (rights_id == Key128{})
-        return boost::none;
+        return {};

    Key128 key_temp{};

@@ -356,17 +356,17 @@ boost::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
    std::memcpy(m_2.data(), rsa_step.data() + 0x21, m_2.size());

    if (m_0 != 0)
-        return boost::none;
+        return {};

    m_1 = m_1 ^ MGF1<0x20>(m_2);
    m_2 = m_2 ^ MGF1<0xDF>(m_1);

    const auto offset = FindTicketOffset(m_2);
-    if (offset == boost::none)
-        return boost::none;
-    ASSERT(offset.get() > 0);
+    if (!offset)
+        return {};
+    ASSERT(*offset > 0);

-    std::memcpy(key_temp.data(), m_2.data() + offset.get(), key_temp.size());
+    std::memcpy(key_temp.data(), m_2.data() + *offset, key_temp.size());

    return std::make_pair(rights_id, key_temp);
 }
@@ -395,7 +395,7 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
    if (base.size() < begin + length)
        return false;
    return std::all_of(base.begin() + begin, base.begin() + begin + length,
-                       [](u8 c) { return std::isdigit(c); });
+                       [](u8 c) { return std::isxdigit(c); });
 }

 void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
@@ -661,8 +661,8 @@ void KeyManager::DeriveSDSeedLazy() {
        return;

    const auto res = DeriveSDSeed();
-    if (res != boost::none)
-        SetKey(S128KeyType::SDSeed, res.get());
+    if (res)
+        SetKey(S128KeyType::SDSeed, *res);
 }

 static Key128 CalculateCMAC(const u8* source, size_t size, const Key128& key) {
@@ -889,9 +889,9 @@ void KeyManager::DeriveETicket(PartitionDataManager& data) {

    for (const auto& raw : res) {
        const auto pair = ParseTicket(raw, rsa_key);
-        if (pair == boost::none)
+        if (!pair)
            continue;
-        const auto& [rid, key] = pair.value();
+        const auto& [rid, key] = *pair;
        u128 rights_id;
        std::memcpy(rights_id.data(), rid.data(), rid.size());
        SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]);
--- a/src/core/crypto/key_manager.h
+++ b/src/core/crypto/key_manager.h
@@ -6,9 +6,10 @@

 #include <array>
 #include <map>
+#include <optional>
 #include <string>
+
 #include <boost/container/flat_map.hpp>
-#include <boost/optional.hpp>
 #include <fmt/format.h>
 #include "common/common_types.h"
 #include "core/crypto/partition_data_manager.h"
@@ -191,14 +192,14 @@ Key128 DeriveMasterKey(const std::array<u8, 0x90>& keyblob, const Key128& master
 std::array<u8, 0x90> DecryptKeyblob(const std::array<u8, 0xB0>& encrypted_keyblob,
                                    const Key128& key);

-boost::optional<Key128> DeriveSDSeed();
+std::optional<Key128> DeriveSDSeed();
 Loader::ResultStatus DeriveSDKeys(std::array<Key256, 2>& sd_keys, KeyManager& keys);

 std::vector<TicketRaw> GetTicketblob(const FileUtil::IOFile& ticket_save);

 // Returns a pair of {rights_id, titlekey}. Fails if the ticket has no certificate authority (offset
 // 0x140-0x144 is zero)
-boost::optional<std::pair<Key128, Key128>> ParseTicket(
-    const TicketRaw& ticket, const RSAKeyPair<2048>& eticket_extended_key);
+std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
+                                                     const RSAKeyPair<2048>& eticket_extended_key);

 } // namespace Core::Crypto
--- a/src/core/file_sys/card_image.cpp
+++ b/src/core/file_sys/card_image.cpp
@@ -168,10 +168,6 @@ VirtualDir XCI::GetParentDirectory() const {
    return file->GetContainingDirectory();
 }

-bool XCI::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
-    return false;
-}
-
 Loader::ResultStatus XCI::AddNCAFromPartition(XCIPartition part) {
    if (partitions[static_cast<std::size_t>(part)] == nullptr) {
        return Loader::ResultStatus::ErrorXCIMissingPartition;
--- a/src/core/file_sys/card_image.h
+++ b/src/core/file_sys/card_image.h
@@ -94,9 +94,6 @@ public:

    VirtualDir GetParentDirectory() const override;

-protected:
-    bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
-
 private:
    Loader::ResultStatus AddNCAFromPartition(XCIPartition part);

--- a/src/core/file_sys/content_archive.cpp
+++ b/src/core/file_sys/content_archive.cpp
@@ -4,10 +4,9 @@

 #include <algorithm>
 #include <cstring>
+#include <optional>
 #include <utility>

-#include <boost/optional.hpp>
-
 #include "common/logging/log.h"
 #include "core/crypto/aes_util.h"
 #include "core/crypto/ctr_encryption_layer.h"
@@ -306,18 +305,18 @@ bool NCA::ReadRomFSSection(const NCASectionHeader& section, const NCASectionTabl
        subsection_buckets.back().entries.push_back({section.bktr.relocation.offset, {0}, ctr_low});
        subsection_buckets.back().entries.push_back({size, {0}, 0});

-        boost::optional<Core::Crypto::Key128> key = boost::none;
+        std::optional<Core::Crypto::Key128> key = {};
        if (encrypted) {
            if (has_rights_id) {
                status = Loader::ResultStatus::Success;
                key = GetTitlekey();
-                if (key == boost::none) {
+                if (!key) {
                    status = Loader::ResultStatus::ErrorMissingTitlekey;
                    return false;
                }
            } else {
                key = GetKeyAreaKey(NCASectionCryptoType::BKTR);
-                if (key == boost::none) {
+                if (!key) {
                    status = Loader::ResultStatus::ErrorMissingKeyAreaKey;
                    return false;
                }
@@ -332,7 +331,7 @@ bool NCA::ReadRomFSSection(const NCASectionHeader& section, const NCASectionTabl
        auto bktr = std::make_shared<BKTR>(
            bktr_base_romfs, std::make_shared<OffsetVfsFile>(file, romfs_size, base_offset),
            relocation_block, relocation_buckets, subsection_block, subsection_buckets, encrypted,
-            encrypted ? key.get() : Core::Crypto::Key128{}, base_offset, bktr_base_ivfc_offset,
+            encrypted ? *key : Core::Crypto::Key128{}, base_offset, bktr_base_ivfc_offset,
            section.raw.section_ctr);

        // BKTR applies to entire IVFC, so make an offset version to level 6
@@ -388,11 +387,11 @@ u8 NCA::GetCryptoRevision() const {
    return master_key_id;
 }

-boost::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType type) const {
+std::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType type) const {
    const auto master_key_id = GetCryptoRevision();

    if (!keys.HasKey(Core::Crypto::S128KeyType::KeyArea, master_key_id, header.key_index))
-        return boost::none;
+        return {};

    std::vector<u8> key_area(header.key_area.begin(), header.key_area.end());
    Core::Crypto::AESCipher<Core::Crypto::Key128> cipher(
@@ -416,25 +415,25 @@ boost::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType ty
    return out;
 }

-boost::optional<Core::Crypto::Key128> NCA::GetTitlekey() {
+std::optional<Core::Crypto::Key128> NCA::GetTitlekey() {
    const auto master_key_id = GetCryptoRevision();

    u128 rights_id{};
    memcpy(rights_id.data(), header.rights_id.data(), 16);
    if (rights_id == u128{}) {
        status = Loader::ResultStatus::ErrorInvalidRightsID;
-        return boost::none;
+        return {};
    }

    auto titlekey = keys.GetKey(Core::Crypto::S128KeyType::Titlekey, rights_id[1], rights_id[0]);
    if (titlekey == Core::Crypto::Key128{}) {
        status = Loader::ResultStatus::ErrorMissingTitlekey;
-        return boost::none;
+        return {};
    }

    if (!keys.HasKey(Core::Crypto::S128KeyType::Titlekek, master_key_id)) {
        status = Loader::ResultStatus::ErrorMissingTitlekek;
-        return boost::none;
+        return {};
    }

    Core::Crypto::AESCipher<Core::Crypto::Key128> cipher(
@@ -458,25 +457,25 @@ VirtualFile NCA::Decrypt(const NCASectionHeader& s_header, VirtualFile in, u64 s
    case NCASectionCryptoType::BKTR:
        LOG_DEBUG(Crypto, "called with mode=CTR, starting_offset={:016X}", starting_offset);
        {
-            boost::optional<Core::Crypto::Key128> key = boost::none;
+            std::optional<Core::Crypto::Key128> key = {};
            if (has_rights_id) {
                status = Loader::ResultStatus::Success;
                key = GetTitlekey();
-                if (key == boost::none) {
+                if (!key) {
                    if (status == Loader::ResultStatus::Success)
                        status = Loader::ResultStatus::ErrorMissingTitlekey;
                    return nullptr;
                }
            } else {
                key = GetKeyAreaKey(NCASectionCryptoType::CTR);
-                if (key == boost::none) {
+                if (!key) {
                    status = Loader::ResultStatus::ErrorMissingKeyAreaKey;
                    return nullptr;
                }
            }

-            auto out = std::make_shared<Core::Crypto::CTREncryptionLayer>(
-                std::move(in), key.value(), starting_offset);
+            auto out = std::make_shared<Core::Crypto::CTREncryptionLayer>(std::move(in), *key,
+                                                                          starting_offset);
            std::vector<u8> iv(16);
            for (u8 i = 0; i < 8; ++i)
                iv[i] = s_header.raw.section_ctr[0x8 - i - 1];
@@ -546,7 +545,4 @@ u64 NCA::GetBaseIVFCOffset() const {
    return ivfc_offset;
 }

-bool NCA::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
-    return false;
-}
 } // namespace FileSys
--- a/src/core/file_sys/content_archive.h
+++ b/src/core/file_sys/content_archive.h
@@ -6,9 +6,10 @@

 #include <array>
 #include <memory>
+#include <optional>
 #include <string>
 #include <vector>
-#include <boost/optional.hpp>
+
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
@@ -100,9 +101,6 @@ public:
    // Returns the base ivfc offset used in BKTR patching.
    u64 GetBaseIVFCOffset() const;

-protected:
-    bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
-
 private:
    bool CheckSupportedNCA(const NCAHeader& header);
    bool HandlePotentialHeaderDecryption();
@@ -114,8 +112,8 @@ private:
    bool ReadPFS0Section(const NCASectionHeader& section, const NCASectionTableEntry& entry);

    u8 GetCryptoRevision() const;
-    boost::optional<Core::Crypto::Key128> GetKeyAreaKey(NCASectionCryptoType type) const;
-    boost::optional<Core::Crypto::Key128> GetTitlekey();
+    std::optional<Core::Crypto::Key128> GetKeyAreaKey(NCASectionCryptoType type) const;
+    std::optional<Core::Crypto::Key128> GetTitlekey();
    VirtualFile Decrypt(const NCASectionHeader& header, VirtualFile in, u64 starting_offset);

    std::vector<VirtualDir> dirs;
--- a/src/core/file_sys/fsmitm_romfsbuild.h
+++ b/src/core/file_sys/fsmitm_romfsbuild.h
@@ -27,7 +27,6 @@
 #include <map>
 #include <memory>
 #include <string>
-#include <boost/detail/container_fwd.hpp>
 #include "common/common_types.h"
 #include "core/file_sys/vfs.h"

--- a/src/core/file_sys/ips_layer.cpp
+++ b/src/core/file_sys/ips_layer.cpp
@@ -99,16 +99,16 @@ VirtualFile PatchIPS(const VirtualFile& in, const VirtualFile& ips) {
            u16 rle_size{};
            if (ips->ReadObject(&rle_size, offset) != sizeof(u16))
                return nullptr;
-            rle_size = Common::swap16(data_size);
+            rle_size = Common::swap16(rle_size);
            offset += sizeof(u16);

            const auto data = ips->ReadByte(offset++);
-            if (data == boost::none)
+            if (!data)
                return nullptr;

            if (real_offset + rle_size > in_data.size())
                rle_size = static_cast<u16>(in_data.size() - real_offset);
-            std::memset(in_data.data() + real_offset, data.get(), rle_size);
+            std::memset(in_data.data() + real_offset, *data, rle_size);
        } else { // Standard Patch
            auto read = data_size;
            if (real_offset + read > in_data.size())
--- a/src/core/file_sys/partition_filesystem.cpp
+++ b/src/core/file_sys/partition_filesystem.cpp
@@ -83,7 +83,7 @@ std::vector<std::shared_ptr<VfsFile>> PartitionFilesystem::GetFiles() const {
 }

 std::vector<std::shared_ptr<VfsDirectory>> PartitionFilesystem::GetSubdirectories() const {
-    return pfs_dirs;
+    return {};
 }

 std::string PartitionFilesystem::GetName() const {
@@ -103,18 +103,4 @@ void PartitionFilesystem::PrintDebugInfo() const {
                  pfs_files[i]->GetName(), pfs_files[i]->GetSize());
    }
 }
-
-bool PartitionFilesystem::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
-    const auto iter = std::find(pfs_files.begin(), pfs_files.end(), file);
-    if (iter == pfs_files.end())
-        return false;
-
-    const std::ptrdiff_t offset = std::distance(pfs_files.begin(), iter);
-    pfs_files[offset] = std::move(pfs_files.back());
-    pfs_files.pop_back();
-
-    pfs_dirs.emplace_back(std::move(dir));
-
-    return true;
-}
 } // namespace FileSys
--- a/src/core/file_sys/partition_filesystem.h
+++ b/src/core/file_sys/partition_filesystem.h
@@ -35,9 +35,6 @@ public:
    std::shared_ptr<VfsDirectory> GetParentDirectory() const override;
    void PrintDebugInfo() const;

-protected:
-    bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
-
 private:
    struct Header {
        u32_le magic;
@@ -84,7 +81,6 @@ private:
    std::size_t content_offset = 0;

    std::vector<VirtualFile> pfs_files;
-    std::vector<VirtualDir> pfs_dirs;
 };

 } // namespace FileSys
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -61,13 +61,12 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
    // Game Updates
    const auto update_tid = GetUpdateTitleID(title_id);
    const auto update = installed->GetEntry(update_tid, ContentRecordType::Program);
-    if (update != nullptr) {
-        if (update->GetStatus() == Loader::ResultStatus::ErrorMissingBKTRBaseRomFS &&
-            update->GetExeFS() != nullptr) {
-            LOG_INFO(Loader, "    ExeFS: Update ({}) applied successfully",
-                     FormatTitleVersion(installed->GetEntryVersion(update_tid).get_value_or(0)));
-            exefs = update->GetExeFS();
-        }
+
+    if (update != nullptr && update->GetExeFS() != nullptr &&
+        update->GetStatus() == Loader::ResultStatus::ErrorMissingBKTRBaseRomFS) {
+        LOG_INFO(Loader, "    ExeFS: Update ({}) applied successfully",
+                 FormatTitleVersion(installed->GetEntryVersion(update_tid).value_or(0)));
+        exefs = update->GetExeFS();
    }

    return exefs;
@@ -237,7 +236,7 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content
        if (new_nca->GetStatus() == Loader::ResultStatus::Success &&
            new_nca->GetRomFS() != nullptr) {
            LOG_INFO(Loader, "    RomFS: Update ({}) applied successfully",
-                     FormatTitleVersion(installed->GetEntryVersion(update_tid).get_value_or(0)));
+                     FormatTitleVersion(installed->GetEntryVersion(update_tid).value_or(0)));
            romfs = new_nca->GetRomFS();
        }
    } else if (update_raw != nullptr) {
@@ -281,12 +280,11 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
    } else {
        if (installed->HasEntry(update_tid, ContentRecordType::Program)) {
            const auto meta_ver = installed->GetEntryVersion(update_tid);
-            if (meta_ver == boost::none || meta_ver.get() == 0) {
+            if (meta_ver.value_or(0) == 0) {
                out.insert_or_assign("Update", "");
            } else {
                out.insert_or_assign(
-                    "Update",
-                    FormatTitleVersion(meta_ver.get(), TitleVersionFormat::ThreeElements));
+                    "Update", FormatTitleVersion(*meta_ver, TitleVersionFormat::ThreeElements));
            }
        } else if (update_raw != nullptr) {
            out.insert_or_assign("Update", "PACKED");
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -159,28 +159,28 @@ VirtualFile RegisteredCache::GetFileAtID(NcaID id) const {
    return file;
 }

-static boost::optional<NcaID> CheckMapForContentRecord(
+static std::optional<NcaID> CheckMapForContentRecord(
    const boost::container::flat_map<u64, CNMT>& map, u64 title_id, ContentRecordType type) {
    if (map.find(title_id) == map.end())
-        return boost::none;
+        return {};

    const auto& cnmt = map.at(title_id);

    const auto iter = std::find_if(cnmt.GetContentRecords().begin(), cnmt.GetContentRecords().end(),
                                   [type](const ContentRecord& rec) { return rec.type == type; });
    if (iter == cnmt.GetContentRecords().end())
-        return boost::none;
+        return {};

-    return boost::make_optional(iter->nca_id);
+    return std::make_optional(iter->nca_id);
 }

-boost::optional<NcaID> RegisteredCache::GetNcaIDFromMetadata(u64 title_id,
-                                                             ContentRecordType type) const {
+std::optional<NcaID> RegisteredCache::GetNcaIDFromMetadata(u64 title_id,
+                                                           ContentRecordType type) const {
    if (type == ContentRecordType::Meta && meta_id.find(title_id) != meta_id.end())
        return meta_id.at(title_id);

    const auto res1 = CheckMapForContentRecord(yuzu_meta, title_id, type);
-    if (res1 != boost::none)
+    if (res1)
        return res1;
    return CheckMapForContentRecord(meta, title_id, type);
 }
@@ -283,17 +283,14 @@ bool RegisteredCache::HasEntry(RegisteredCacheEntry entry) const {

 VirtualFile RegisteredCache::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
    const auto id = GetNcaIDFromMetadata(title_id, type);
-    if (id == boost::none)
-        return nullptr;
-
-    return GetFileAtID(id.get());
+    return id ? GetFileAtID(*id) : nullptr;
 }

 VirtualFile RegisteredCache::GetEntryUnparsed(RegisteredCacheEntry entry) const {
    return GetEntryUnparsed(entry.title_id, entry.type);
 }

-boost::optional<u32> RegisteredCache::GetEntryVersion(u64 title_id) const {
+std::optional<u32> RegisteredCache::GetEntryVersion(u64 title_id) const {
    const auto meta_iter = meta.find(title_id);
    if (meta_iter != meta.end())
        return meta_iter->second.GetTitleVersion();
@@ -302,15 +299,12 @@ boost::optional<u32> RegisteredCache::GetEntryVersion(u64 title_id) const {
    if (yuzu_meta_iter != yuzu_meta.end())
        return yuzu_meta_iter->second.GetTitleVersion();

-    return boost::none;
+    return {};
 }

 VirtualFile RegisteredCache::GetEntryRaw(u64 title_id, ContentRecordType type) const {
    const auto id = GetNcaIDFromMetadata(title_id, type);
-    if (id == boost::none)
-        return nullptr;
-
-    return parser(GetFileAtID(id.get()), id.get());
+    return id ? parser(GetFileAtID(*id), *id) : nullptr;
 }

 VirtualFile RegisteredCache::GetEntryRaw(RegisteredCacheEntry entry) const {
@@ -364,8 +358,8 @@ std::vector<RegisteredCacheEntry> RegisteredCache::ListEntries() const {
 }

 std::vector<RegisteredCacheEntry> RegisteredCache::ListEntriesFilter(
-    boost::optional<TitleType> title_type, boost::optional<ContentRecordType> record_type,
-    boost::optional<u64> title_id) const {
+    std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
+    std::optional<u64> title_id) const {
    std::vector<RegisteredCacheEntry> out;
    IterateAllMetadata<RegisteredCacheEntry>(
        out,
@@ -373,11 +367,11 @@ std::vector<RegisteredCacheEntry> RegisteredCache::ListEntriesFilter(
            return RegisteredCacheEntry{c.GetTitleID(), r.type};
        },
        [&title_type, &record_type, &title_id](const CNMT& c, const ContentRecord& r) {
-            if (title_type != boost::none && title_type.get() != c.GetType())
+            if (title_type && *title_type != c.GetType())
                return false;
-            if (record_type != boost::none && record_type.get() != r.type)
+            if (record_type && *record_type != r.type)
                return false;
-            if (title_id != boost::none && title_id.get() != c.GetTitleID())
+            if (title_id && *title_id != c.GetTitleID())
                return false;
            return true;
        });
@@ -459,7 +453,7 @@ InstallResult RegisteredCache::InstallEntry(std::shared_ptr<NCA> nca, TitleType

 InstallResult RegisteredCache::RawInstallNCA(std::shared_ptr<NCA> nca, const VfsCopyFunction& copy,
                                             bool overwrite_if_exists,
-                                             boost::optional<NcaID> override_id) {
+                                             std::optional<NcaID> override_id) {
    const auto in = nca->GetBaseFile();
    Core::Crypto::SHA256Hash hash{};

@@ -468,12 +462,12 @@ InstallResult RegisteredCache::RawInstallNCA(std::shared_ptr<NCA> nca, const Vfs
    // game is massive), we're going to cheat and only hash the first MB of the NCA.
    // Also, for XCIs the NcaID matters, so if the override id isn't none, use that.
    NcaID id{};
-    if (override_id == boost::none) {
+    if (override_id) {
+        id = *override_id;
+    } else {
        const auto& data = in->ReadBytes(0x100000);
        mbedtls_sha256(data.data(), data.size(), hash.data(), 0);
        memcpy(id.data(), hash.data(), 16);
-    } else {
-        id = override_id.get();
    }

    std::string path = GetRelativePathFromNcaID(id, false, true);
@@ -543,14 +537,14 @@ bool RegisteredCacheUnion::HasEntry(RegisteredCacheEntry entry) const {
    return HasEntry(entry.title_id, entry.type);
 }

-boost::optional<u32> RegisteredCacheUnion::GetEntryVersion(u64 title_id) const {
+std::optional<u32> RegisteredCacheUnion::GetEntryVersion(u64 title_id) const {
    for (const auto& c : caches) {
        const auto res = c->GetEntryVersion(title_id);
-        if (res != boost::none)
+        if (res)
            return res;
    }

-    return boost::none;
+    return {};
 }

 VirtualFile RegisteredCacheUnion::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
@@ -609,8 +603,8 @@ std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntries() const {
 }

 std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntriesFilter(
-    boost::optional<TitleType> title_type, boost::optional<ContentRecordType> record_type,
-    boost::optional<u64> title_id) const {
+    std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
+    std::optional<u64> title_id) const {
    std::vector<RegisteredCacheEntry> out;
    for (const auto& c : caches) {
        c->IterateAllMetadata<RegisteredCacheEntry>(
@@ -619,11 +613,11 @@ std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntriesFilter(
                return RegisteredCacheEntry{c.GetTitleID(), r.type};
            },
            [&title_type, &record_type, &title_id](const CNMT& c, const ContentRecord& r) {
-                if (title_type != boost::none && title_type.get() != c.GetType())
+                if (title_type && *title_type != c.GetType())
                    return false;
-                if (record_type != boost::none && record_type.get() != r.type)
+                if (record_type && *record_type != r.type)
                    return false;
-                if (title_id != boost::none && title_id.get() != c.GetTitleID())
+                if (title_id && *title_id != c.GetTitleID())
                    return false;
                return true;
            });
--- a/src/core/file_sys/registered_cache.h
+++ b/src/core/file_sys/registered_cache.h
@@ -84,7 +84,7 @@ public:
    bool HasEntry(u64 title_id, ContentRecordType type) const;
    bool HasEntry(RegisteredCacheEntry entry) const;

-    boost::optional<u32> GetEntryVersion(u64 title_id) const;
+    std::optional<u32> GetEntryVersion(u64 title_id) const;

    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const;
    VirtualFile GetEntryUnparsed(RegisteredCacheEntry entry) const;
@@ -96,11 +96,10 @@ public:
    std::unique_ptr<NCA> GetEntry(RegisteredCacheEntry entry) const;

    std::vector<RegisteredCacheEntry> ListEntries() const;
-    // If a parameter is not boost::none, it will be filtered for from all entries.
+    // If a parameter is not std::nullopt, it will be filtered for from all entries.
    std::vector<RegisteredCacheEntry> ListEntriesFilter(
-        boost::optional<TitleType> title_type = boost::none,
-        boost::optional<ContentRecordType> record_type = boost::none,
-        boost::optional<u64> title_id = boost::none) const;
+        std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
+        std::optional<u64> title_id = {}) const;

    // Raw copies all the ncas from the xci/nsp to the csache. Does some quick checks to make sure
    // there is a meta NCA and all of them are accessible.
@@ -125,12 +124,11 @@ private:
    std::vector<NcaID> AccumulateFiles() const;
    void ProcessFiles(const std::vector<NcaID>& ids);
    void AccumulateYuzuMeta();
-    boost::optional<NcaID> GetNcaIDFromMetadata(u64 title_id, ContentRecordType type) const;
+    std::optional<NcaID> GetNcaIDFromMetadata(u64 title_id, ContentRecordType type) const;
    VirtualFile GetFileAtID(NcaID id) const;
    VirtualFile OpenFileOrDirectoryConcat(const VirtualDir& dir, std::string_view path) const;
    InstallResult RawInstallNCA(std::shared_ptr<NCA> nca, const VfsCopyFunction& copy,
-                                bool overwrite_if_exists,
-                                boost::optional<NcaID> override_id = boost::none);
+                                bool overwrite_if_exists, std::optional<NcaID> override_id = {});
    bool RawInstallYuzuMeta(const CNMT& cnmt);

    VirtualDir dir;
@@ -153,7 +151,7 @@ public:
    bool HasEntry(u64 title_id, ContentRecordType type) const;
    bool HasEntry(RegisteredCacheEntry entry) const;

-    boost::optional<u32> GetEntryVersion(u64 title_id) const;
+    std::optional<u32> GetEntryVersion(u64 title_id) const;

    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const;
    VirtualFile GetEntryUnparsed(RegisteredCacheEntry entry) const;
@@ -165,11 +163,10 @@ public:
    std::unique_ptr<NCA> GetEntry(RegisteredCacheEntry entry) const;

    std::vector<RegisteredCacheEntry> ListEntries() const;
-    // If a parameter is not boost::none, it will be filtered for from all entries.
+    // If a parameter is not std::nullopt, it will be filtered for from all entries.
    std::vector<RegisteredCacheEntry> ListEntriesFilter(
-        boost::optional<TitleType> title_type = boost::none,
-        boost::optional<ContentRecordType> record_type = boost::none,
-        boost::optional<u64> title_id = boost::none) const;
+        std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
+        std::optional<u64> title_id = {}) const;

 private:
    std::vector<RegisteredCache*> caches;
--- a/src/core/file_sys/submission_package.cpp
+++ b/src/core/file_sys/submission_package.cpp
@@ -205,10 +205,6 @@ VirtualDir NSP::GetParentDirectory() const {
    return file->GetContainingDirectory();
 }

-bool NSP::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
-    return false;
-}
-
 void NSP::InitializeExeFSAndRomFS(const std::vector<VirtualFile>& files) {
    exefs = pfs;

--- a/src/core/file_sys/submission_package.h
+++ b/src/core/file_sys/submission_package.h
@@ -55,9 +55,6 @@ public:

    VirtualDir GetParentDirectory() const override;

-protected:
-    bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
-
 private:
    void InitializeExeFSAndRomFS(const std::vector<VirtualFile>& files);
    void ReadNCAs(const std::vector<VirtualFile>& files);
--- a/src/core/file_sys/vfs.cpp
+++ b/src/core/file_sys/vfs.cpp
@@ -167,13 +167,13 @@ std::string VfsFile::GetExtension() const {

 VfsDirectory::~VfsDirectory() = default;

-boost::optional<u8> VfsFile::ReadByte(std::size_t offset) const {
+std::optional<u8> VfsFile::ReadByte(std::size_t offset) const {
    u8 out{};
    std::size_t size = Read(&out, 1, offset);
    if (size == 1)
        return out;

-    return boost::none;
+    return {};
 }

 std::vector<u8> VfsFile::ReadBytes(std::size_t size, std::size_t offset) const {
--- a/src/core/file_sys/vfs.h
+++ b/src/core/file_sys/vfs.h
@@ -4,13 +4,15 @@

 #pragma once

+#include <functional>
 #include <map>
 #include <memory>
+#include <optional>
 #include <string>
 #include <string_view>
 #include <type_traits>
 #include <vector>
-#include <boost/optional.hpp>
+
 #include "common/common_types.h"
 #include "core/file_sys/vfs_types.h"

@@ -103,8 +105,8 @@ public:
    // into file. Returns number of bytes successfully written.
    virtual std::size_t Write(const u8* data, std::size_t length, std::size_t offset = 0) = 0;

-    // Reads exactly one byte at the offset provided, returning boost::none on error.
-    virtual boost::optional<u8> ReadByte(std::size_t offset = 0) const;
+    // Reads exactly one byte at the offset provided, returning std::nullopt on error.
+    virtual std::optional<u8> ReadByte(std::size_t offset = 0) const;
    // Reads size bytes starting at offset in file into a vector.
    virtual std::vector<u8> ReadBytes(std::size_t size, std::size_t offset = 0) const;
    // Reads all the bytes from the file into a vector. Equivalent to 'file->Read(file->GetSize(),
@@ -262,36 +264,8 @@ public:
    // item name -> type.
    virtual std::map<std::string, VfsEntryType, std::less<>> GetEntries() const;

-    // Interprets the file with name file instead as a directory of type directory.
-    // The directory must have a constructor that takes a single argument of type
-    // std::shared_ptr<VfsFile>. Allows to reinterpret container files (i.e NCA, zip, XCI, etc) as a
-    // subdirectory in one call.
-    template <typename Directory>
-    bool InterpretAsDirectory(std::string_view file) {
-        auto file_p = GetFile(file);
-
-        if (file_p == nullptr) {
-            return false;
-        }
-
-        return ReplaceFileWithSubdirectory(file_p, std::make_shared<Directory>(file_p));
-    }
-
-    bool InterpretAsDirectory(const std::function<VirtualDir(VirtualFile)>& function,
-                              const std::string& file) {
-        auto file_p = GetFile(file);
-        if (file_p == nullptr)
-            return false;
-        return ReplaceFileWithSubdirectory(file_p, function(file_p));
-    }
-
    // Returns the full path of this directory as a string, recursively
    virtual std::string GetFullPath() const;
-
-protected:
-    // Backend for InterpretAsDirectory.
-    // Removes all references to file and adds a reference to dir in the directory's implementation.
-    virtual bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) = 0;
 };

 // A convenience partial-implementation of VfsDirectory that stubs out methods that should only work
--- a/src/core/file_sys/vfs_layered.cpp
+++ b/src/core/file_sys/vfs_layered.cpp
@@ -126,7 +126,4 @@ bool LayeredVfsDirectory::Rename(std::string_view name_) {
    return true;
 }

-bool LayeredVfsDirectory::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
-    return false;
-}
 } // namespace FileSys
--- a/src/core/file_sys/vfs_layered.h
+++ b/src/core/file_sys/vfs_layered.h
@@ -39,9 +39,6 @@ public:
    bool DeleteFile(std::string_view name) override;
    bool Rename(std::string_view name) override;

-protected:
-    bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
-
 private:
    std::vector<VirtualDir> dirs;
    std::string name;
--- a/src/core/file_sys/vfs_offset.cpp
+++ b/src/core/file_sys/vfs_offset.cpp
@@ -57,11 +57,11 @@ std::size_t OffsetVfsFile::Write(const u8* data, std::size_t length, std::size_t
    return file->Write(data, TrimToFit(length, r_offset), offset + r_offset);
 }

-boost::optional<u8> OffsetVfsFile::ReadByte(std::size_t r_offset) const {
+std::optional<u8> OffsetVfsFile::ReadByte(std::size_t r_offset) const {
    if (r_offset < size)
        return file->ReadByte(offset + r_offset);

-    return boost::none;
+    return {};
 }

 std::vector<u8> OffsetVfsFile::ReadBytes(std::size_t r_size, std::size_t r_offset) const {
--- a/src/core/file_sys/vfs_offset.h
+++ b/src/core/file_sys/vfs_offset.h
@@ -29,7 +29,7 @@ public:
    bool IsReadable() const override;
    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
-    boost::optional<u8> ReadByte(std::size_t offset) const override;
+    std::optional<u8> ReadByte(std::size_t offset) const override;
    std::vector<u8> ReadBytes(std::size_t size, std::size_t offset) const override;
    std::vector<u8> ReadAllBytes() const override;
    bool WriteByte(u8 data, std::size_t offset) override;
--- a/src/core/file_sys/vfs_real.cpp
+++ b/src/core/file_sys/vfs_real.cpp
@@ -430,7 +430,4 @@ std::map<std::string, VfsEntryType, std::less<>> RealVfsDirectory::GetEntries()
    return out;
 }

-bool RealVfsDirectory::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
-    return false;
-}
 } // namespace FileSys
--- a/src/core/file_sys/vfs_real.h
+++ b/src/core/file_sys/vfs_real.h
@@ -100,9 +100,6 @@ public:
    std::string GetFullPath() const override;
    std::map<std::string, VfsEntryType, std::less<>> GetEntries() const override;

-protected:
-    bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
-
 private:
    RealVfsDirectory(RealVfsFilesystem& base, const std::string& path, Mode perms = Mode::Read);

--- a/src/core/file_sys/vfs_static.h
+++ b/src/core/file_sys/vfs_static.h
@@ -53,10 +53,10 @@ public:
        return 0;
    }

-    boost::optional<u8> ReadByte(std::size_t offset) const override {
+    std::optional<u8> ReadByte(std::size_t offset) const override {
        if (offset < size)
            return value;
-        return boost::none;
+        return {};
    }

    std::vector<u8> ReadBytes(std::size_t length, std::size_t offset) const override {
--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -132,11 +132,4 @@ void VectorVfsDirectory::AddFile(VirtualFile file) {
 void VectorVfsDirectory::AddDirectory(VirtualDir dir) {
    dirs.push_back(std::move(dir));
 }
-
-bool VectorVfsDirectory::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
-    if (!DeleteFile(file->GetName()))
-        return false;
-    dirs.emplace_back(std::move(dir));
-    return true;
-}
 } // namespace FileSys
--- a/src/core/file_sys/vfs_vector.h
+++ b/src/core/file_sys/vfs_vector.h
@@ -57,9 +57,6 @@ public:
    virtual void AddFile(VirtualFile file);
    virtual void AddDirectory(VirtualDir dir);

-protected:
-    bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
-
 private:
    std::vector<VirtualFile> files;
    std::vector<VirtualDir> dirs;
--- a/src/core/file_sys/xts_archive.cpp
+++ b/src/core/file_sys/xts_archive.cpp
@@ -163,7 +163,4 @@ std::shared_ptr<VfsDirectory> NAX::GetParentDirectory() const {
    return file->GetContainingDirectory();
 }

-bool NAX::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
-    return false;
-}
 } // namespace FileSys
--- a/src/core/file_sys/xts_archive.h
+++ b/src/core/file_sys/xts_archive.h
@@ -51,9 +51,6 @@ public:

    std::shared_ptr<VfsDirectory> GetParentDirectory() const override;

-protected:
-    bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
-
 private:
    Loader::ResultStatus Parse(std::string_view path);

--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -117,8 +117,7 @@ public:

        AlignWithPadding();

-        const bool request_has_domain_header{context.GetDomainMessageHeader() != nullptr};
-        if (context.Session()->IsDomain() && request_has_domain_header) {
+        if (context.Session()->IsDomain() && context.HasDomainMessageHeader()) {
            IPC::DomainMessageHeader domain_header{};
            domain_header.num_objects = num_domain_objects;
            PushRaw(domain_header);
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -161,8 +161,12 @@ public:
        return buffer_c_desciptors;
    }

-    const std::shared_ptr<IPC::DomainMessageHeader>& GetDomainMessageHeader() const {
-        return domain_message_header;
+    const IPC::DomainMessageHeader* GetDomainMessageHeader() const {
+        return domain_message_header.get();
+    }
+
+    bool HasDomainMessageHeader() const {
+        return domain_message_header != nullptr;
    }

    /// Helper function to read a buffer using the appropriate buffer descriptor
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -32,7 +32,7 @@ namespace Kernel {
 */
 static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_late) {
    const auto proper_handle = static_cast<Handle>(thread_handle);
-    auto& system = Core::System::GetInstance();
+    const auto& system = Core::System::GetInstance();

    // Lock the global kernel mutex when we enter the kernel HLE.
    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
@@ -90,7 +90,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
 /// The timer callback event, called when a timer is fired
 static void TimerCallback(u64 timer_handle, int cycles_late) {
    const auto proper_handle = static_cast<Handle>(timer_handle);
-    auto& system = Core::System::GetInstance();
+    const auto& system = Core::System::GetInstance();
    SharedPtr<Timer> timer = system.Kernel().RetrieveTimerFromCallbackHandleTable(proper_handle);

    if (timer == nullptr) {
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -6,8 +6,6 @@
 #include <utility>
 #include <vector>

-#include <boost/range/algorithm_ext/erase.hpp>
-
 #include "common/assert.h"
 #include "core/core.h"
 #include "core/hle/kernel/errors.h"
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -232,6 +232,12 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
    MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::CodeStatic);
    MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeMutable);
    MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable);
+
+    // Clear instruction cache in CPU JIT
+    Core::System::GetInstance().ArmInterface(0).ClearInstructionCache();
+    Core::System::GetInstance().ArmInterface(1).ClearInstructionCache();
+    Core::System::GetInstance().ArmInterface(2).ClearInstructionCache();
+    Core::System::GetInstance().ArmInterface(3).ClearInstructionCache();
 }

 ResultVal<VAddr> Process::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -202,6 +202,16 @@ public:
        return is_64bit_process;
    }

+    /// Gets the total running time of the process instance in ticks.
+    u64 GetCPUTimeTicks() const {
+        return total_process_running_time_ticks;
+    }
+
+    /// Updates the total running time, adding the given ticks to it.
+    void UpdateCPUTimeTicks(u64 ticks) {
+        total_process_running_time_ticks += ticks;
+    }
+
    /**
     * Loads process-specifics configuration info with metadata provided
     * by an executable.
@@ -305,6 +315,9 @@ private:
    /// specified by metadata provided to the process during loading.
    bool is_64bit_process = true;

+    /// Total running time for the process in ticks.
+    u64 total_process_running_time_ticks = 0;
+
    /// Per-process handle table for storing created object handles in.
    HandleTable handle_table;

--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -9,6 +9,7 @@
 #include "common/logging/log.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
+#include "core/core_timing.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/scheduler.h"
@@ -34,6 +35,10 @@ Thread* Scheduler::GetCurrentThread() const {
    return current_thread.get();
 }

+u64 Scheduler::GetLastContextSwitchTicks() const {
+    return last_context_switch_time;
+}
+
 Thread* Scheduler::PopNextReadyThread() {
    Thread* next = nullptr;
    Thread* thread = GetCurrentThread();
@@ -54,7 +59,10 @@ Thread* Scheduler::PopNextReadyThread() {
 }

 void Scheduler::SwitchContext(Thread* new_thread) {
-    Thread* previous_thread = GetCurrentThread();
+    Thread* const previous_thread = GetCurrentThread();
+    Process* const previous_process = Core::CurrentProcess();
+
+    UpdateLastContextSwitchTime(previous_thread, previous_process);

    // Save context for previous thread
    if (previous_thread) {
@@ -78,8 +86,6 @@ void Scheduler::SwitchContext(Thread* new_thread) {
        // Cancel any outstanding wakeup events for this thread
        new_thread->CancelWakeupTimer();

-        auto* const previous_process = Core::CurrentProcess();
-
        current_thread = new_thread;

        ready_queue.remove(new_thread->GetPriority(), new_thread);
@@ -102,6 +108,22 @@ void Scheduler::SwitchContext(Thread* new_thread) {
    }
 }

+void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
+    const u64 prev_switch_ticks = last_context_switch_time;
+    const u64 most_recent_switch_ticks = CoreTiming::GetTicks();
+    const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
+
+    if (thread != nullptr) {
+        thread->UpdateCPUTimeTicks(update_ticks);
+    }
+
+    if (process != nullptr) {
+        process->UpdateCPUTimeTicks(update_ticks);
+    }
+
+    last_context_switch_time = most_recent_switch_ticks;
+}
+
 void Scheduler::Reschedule() {
    std::lock_guard<std::mutex> lock(scheduler_mutex);

--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -17,6 +17,8 @@ class ARM_Interface;

 namespace Kernel {

+class Process;
+
 class Scheduler final {
 public:
    explicit Scheduler(Core::ARM_Interface& cpu_core);
@@ -31,6 +33,9 @@ public:
    /// Gets the current running thread
    Thread* GetCurrentThread() const;

+    /// Gets the timestamp for the last context switch in ticks.
+    u64 GetLastContextSwitchTicks() const;
+
    /// Adds a new thread to the scheduler
    void AddThread(SharedPtr<Thread> thread, u32 priority);

@@ -64,6 +69,19 @@ private:
     */
    void SwitchContext(Thread* new_thread);

+    /**
+     * Called on every context switch to update the internal timestamp
+     * This also updates the running time ticks for the given thread and
+     * process using the following difference:
+     *
+     * ticks += most_recent_ticks - last_context_switch_ticks
+     *
+     * The internal tick timestamp for the scheduler is simply the
+     * most recent tick count retrieved. No special arithmetic is
+     * applied to it.
+     */
+    void UpdateLastContextSwitchTime(Thread* thread, Process* process);
+
    /// Lists all thread ids that aren't deleted/etc.
    std::vector<SharedPtr<Thread>> thread_list;

@@ -73,6 +91,7 @@ private:
    SharedPtr<Thread> current_thread = nullptr;

    Core::ARM_Interface& cpu_core;
+    u64 last_context_switch_time = 0;

    static std::mutex scheduler_mutex;
 };
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -63,7 +63,7 @@ void ServerSession::Acquire(Thread* thread) {
 }

 ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
-    auto& domain_message_header = context.GetDomainMessageHeader();
+    auto* const domain_message_header = context.GetDomainMessageHeader();
    if (domain_message_header) {
        // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
        context.SetDomainRequestHandlers(domain_request_handlers);
@@ -111,7 +111,7 @@ ResultCode ServerSession::HandleSyncRequest(SharedPtr<Thread> thread) {

    ResultCode result = RESULT_SUCCESS;
    // If the session has been converted to a domain, handle the domain request
-    if (IsDomain() && context.GetDomainMessageHeader()) {
+    if (IsDomain() && context.HasDomainMessageHeader()) {
        result = HandleDomainSyncRequest(context);
        // If there is no domain header, the regular session handler is used
    } else if (hle_handler != nullptr) {
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -395,16 +395,42 @@ struct BreakReason {
 /// Break program execution
 static void Break(u32 reason, u64 info1, u64 info2) {
    BreakReason break_reason{reason};
+    bool has_dumped_buffer{};

+    const auto handle_debug_buffer = [&](VAddr addr, u64 sz) {
+        if (sz == 0 || addr == 0 || has_dumped_buffer) {
+            return;
+        }
+
+        // This typically is an error code so we're going to assume this is the case
+        if (sz == sizeof(u32)) {
+            LOG_CRITICAL(Debug_Emulated, "debug_buffer_err_code={:X}", Memory::Read32(addr));
+        } else {
+            // We don't know what's in here so we'll hexdump it
+            std::vector<u8> debug_buffer(sz);
+            Memory::ReadBlock(addr, debug_buffer.data(), sz);
+            std::string hexdump;
+            for (std::size_t i = 0; i < debug_buffer.size(); i++) {
+                hexdump += fmt::format("{:02X} ", debug_buffer[i]);
+                if (i != 0 && i % 16 == 0) {
+                    hexdump += '\n';
+                }
+            }
+            LOG_CRITICAL(Debug_Emulated, "debug_buffer=\n{}", hexdump);
+        }
+        has_dumped_buffer = true;
+    };
    switch (break_reason.break_type) {
    case BreakType::Panic:
        LOG_CRITICAL(Debug_Emulated, "Signalling debugger, PANIC! info1=0x{:016X}, info2=0x{:016X}",
                     info1, info2);
+        handle_debug_buffer(info1, info2);
        break;
    case BreakType::AssertionFailed:
        LOG_CRITICAL(Debug_Emulated,
                     "Signalling debugger, Assertion failed! info1=0x{:016X}, info2=0x{:016X}",
                     info1, info2);
+        handle_debug_buffer(info1, info2);
        break;
    case BreakType::PreNROLoad:
        LOG_WARNING(
@@ -433,6 +459,7 @@ static void Break(u32 reason, u64 info1, u64 info2) {
            Debug_Emulated,
            "Signalling debugger, Unknown break reason {}, info1=0x{:016X}, info2=0x{:016X}",
            static_cast<u32>(break_reason.break_type.Value()), info1, info2);
+        handle_debug_buffer(info1, info2);
        break;
    }

@@ -441,6 +468,7 @@ static void Break(u32 reason, u64 info1, u64 info2) {
            Debug_Emulated,
            "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
            reason, info1, info2);
+        handle_debug_buffer(info1, info2);
        ASSERT(false);

        Core::CurrentProcess()->PrepareForTermination();
@@ -467,6 +495,37 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
    LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id,
              info_sub_id, handle);

+    enum class GetInfoType : u64 {
+        // 1.0.0+
+        AllowedCpuIdBitmask = 0,
+        AllowedThreadPrioBitmask = 1,
+        MapRegionBaseAddr = 2,
+        MapRegionSize = 3,
+        HeapRegionBaseAddr = 4,
+        HeapRegionSize = 5,
+        TotalMemoryUsage = 6,
+        TotalHeapUsage = 7,
+        IsCurrentProcessBeingDebugged = 8,
+        ResourceHandleLimit = 9,
+        IdleTickCount = 10,
+        RandomEntropy = 11,
+        PerformanceCounter = 0xF0000002,
+        // 2.0.0+
+        ASLRRegionBaseAddr = 12,
+        ASLRRegionSize = 13,
+        NewMapRegionBaseAddr = 14,
+        NewMapRegionSize = 15,
+        // 3.0.0+
+        IsVirtualAddressMemoryEnabled = 16,
+        PersonalMmHeapUsage = 17,
+        TitleId = 18,
+        // 4.0.0+
+        PrivilegedProcessId = 19,
+        // 5.0.0+
+        UserExceptionContextAddr = 20,
+        ThreadTickCount = 0xF0000002,
+    };
+
    const auto* current_process = Core::CurrentProcess();
    const auto& vm_manager = current_process->VMManager();

@@ -529,6 +588,36 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
                    "(STUBBED) Attempted to query user exception context address, returned 0");
        *result = 0;
        break;
+    case GetInfoType::ThreadTickCount: {
+        constexpr u64 num_cpus = 4;
+        if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) {
+            return ERR_INVALID_COMBINATION_KERNEL;
+        }
+
+        const auto thread =
+            current_process->GetHandleTable().Get<Thread>(static_cast<Handle>(handle));
+        if (!thread) {
+            return ERR_INVALID_HANDLE;
+        }
+
+        const auto& system = Core::System::GetInstance();
+        const auto& scheduler = system.CurrentScheduler();
+        const auto* const current_thread = scheduler.GetCurrentThread();
+        const bool same_thread = current_thread == thread;
+
+        const u64 prev_ctx_ticks = scheduler.GetLastContextSwitchTicks();
+        u64 out_ticks = 0;
+        if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
+            const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();
+
+            out_ticks = thread_ticks + (CoreTiming::GetTicks() - prev_ctx_ticks);
+        } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
+            out_ticks = CoreTiming::GetTicks() - prev_ctx_ticks;
+        }
+
+        *result = out_ticks;
+        break;
+    }
    default:
        UNIMPLEMENTED();
    }
--- a/src/core/hle/kernel/svc.h
+++ b/src/core/hle/kernel/svc.h
@@ -24,37 +24,6 @@ struct PageInfo {
    u64 flags;
 };

-/// Values accepted by svcGetInfo
-enum class GetInfoType : u64 {
-    // 1.0.0+
-    AllowedCpuIdBitmask = 0,
-    AllowedThreadPrioBitmask = 1,
-    MapRegionBaseAddr = 2,
-    MapRegionSize = 3,
-    HeapRegionBaseAddr = 4,
-    HeapRegionSize = 5,
-    TotalMemoryUsage = 6,
-    TotalHeapUsage = 7,
-    IsCurrentProcessBeingDebugged = 8,
-    ResourceHandleLimit = 9,
-    IdleTickCount = 10,
-    RandomEntropy = 11,
-    PerformanceCounter = 0xF0000002,
-    // 2.0.0+
-    ASLRRegionBaseAddr = 12,
-    ASLRRegionSize = 13,
-    NewMapRegionBaseAddr = 14,
-    NewMapRegionSize = 15,
-    // 3.0.0+
-    IsVirtualAddressMemoryEnabled = 16,
-    PersonalMmHeapUsage = 17,
-    TitleId = 18,
-    // 4.0.0+
-    PrivilegedProcessId = 19,
-    // 5.0.0+
-    UserExceptionContextAddr = 20,
-};
-
 void CallSVC(u32 immediate);

 } // namespace Kernel
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -4,9 +4,9 @@

 #include <algorithm>
 #include <cinttypes>
+#include <optional>
 #include <vector>

-#include <boost/optional.hpp>
 #include <boost/range/algorithm_ext/erase.hpp>

 #include "common/assert.h"
@@ -94,7 +94,7 @@ void Thread::CancelWakeupTimer() {
    CoreTiming::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), callback_handle);
 }

-static boost::optional<s32> GetNextProcessorId(u64 mask) {
+static std::optional<s32> GetNextProcessorId(u64 mask) {
    for (s32 index = 0; index < Core::NUM_CPU_CORES; ++index) {
        if (mask & (1ULL << index)) {
            if (!Core::System::GetInstance().Scheduler(index).GetCurrentThread()) {
@@ -142,7 +142,7 @@ void Thread::ResumeFromWait() {

    status = ThreadStatus::Ready;

-    boost::optional<s32> new_processor_id = GetNextProcessorId(affinity_mask);
+    std::optional<s32> new_processor_id = GetNextProcessorId(affinity_mask);
    if (!new_processor_id) {
        new_processor_id = processor_id;
    }
@@ -369,7 +369,7 @@ void Thread::ChangeCore(u32 core, u64 mask) {
        return;
    }

-    boost::optional<s32> new_processor_id{GetNextProcessorId(affinity_mask)};
+    std::optional<s32> new_processor_id{GetNextProcessorId(affinity_mask)};

    if (!new_processor_id) {
        new_processor_id = processor_id;
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -258,6 +258,14 @@ public:
        return last_running_ticks;
    }

+    u64 GetTotalCPUTimeTicks() const {
+        return total_cpu_time_ticks;
+    }
+
+    void UpdateCPUTimeTicks(u64 ticks) {
+        total_cpu_time_ticks += ticks;
+    }
+
    s32 GetProcessorID() const {
        return processor_id;
    }
@@ -378,7 +386,8 @@ private:
    u32 nominal_priority = 0; ///< Nominal thread priority, as set by the emulated application
    u32 current_priority = 0; ///< Current thread priority, can be temporarily changed

-    u64 last_running_ticks = 0; ///< CPU tick when thread was last running
+    u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
+    u64 last_running_ticks = 0;   ///< CPU tick when thread was last running

    s32 processor_id = 0;

--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -143,6 +143,26 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
    return MakeResult<VMAHandle>(MergeAdjacent(vma_handle));
 }

+ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {
+    // Find the first Free VMA.
+    const VAddr base = GetASLRRegionBaseAddress();
+    const VMAHandle vma_handle = std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) {
+        if (vma.second.type != VMAType::Free)
+            return false;
+
+        const VAddr vma_end = vma.second.base + vma.second.size;
+        return vma_end > base && vma_end >= base + size;
+    });
+
+    if (vma_handle == vma_map.end()) {
+        // TODO(Subv): Find the correct error code here.
+        return ResultCode(-1);
+    }
+
+    const VAddr target = std::max(base, vma_handle->second.base);
+    return MakeResult<VAddr>(target);
+}
+
 ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
                                                   MemoryState state,
                                                   Memory::MemoryHookPointer mmio_handler) {
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -157,6 +157,14 @@ public:
     */
    ResultVal<VMAHandle> MapBackingMemory(VAddr target, u8* memory, u64 size, MemoryState state);

+    /**
+     * Finds the first free address that can hold a region of the desired size.
+     *
+     * @param size Size of the desired region.
+     * @return The found free address.
+     */
+    ResultVal<VAddr> FindFreeRegion(u64 size) const;
+
    /**
     * Maps a memory-mapped IO region at a given address.
     *
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -242,6 +242,28 @@ void Module::Interface::GetBaasAccountManagerForApplication(Kernel::HLERequestCo
    LOG_DEBUG(Service_ACC, "called");
 }

+void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_ACC, "called");
+    // A u8 is passed into this function which we can safely ignore. It's to determine if we have
+    // access to use the network or not by the looks of it
+    IPC::ResponseBuilder rb{ctx, 6};
+    if (profile_manager->GetUserCount() != 1) {
+        rb.Push(RESULT_SUCCESS);
+        rb.PushRaw<u128>(INVALID_UUID);
+        return;
+    }
+    auto user_list = profile_manager->GetAllUsers();
+    if (user_list.empty()) {
+        rb.Push(ResultCode(-1)); // TODO(ogniK): Find the correct error code
+        rb.PushRaw<u128>(INVALID_UUID);
+        return;
+    }
+
+    // Select the first user we have
+    rb.Push(RESULT_SUCCESS);
+    rb.PushRaw<u128>(profile_manager->GetUser(0)->uuid);
+}
+
 Module::Interface::Interface(std::shared_ptr<Module> module,
                             std::shared_ptr<ProfileManager> profile_manager, const char* name)
    : ServiceFramework(name), module(std::move(module)),
--- a/src/core/hle/service/acc/acc.h
+++ b/src/core/hle/service/acc/acc.h
@@ -27,6 +27,7 @@ public:
        void InitializeApplicationInfo(Kernel::HLERequestContext& ctx);
        void GetBaasAccountManagerForApplication(Kernel::HLERequestContext& ctx);
        void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx);
+        void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx);

    protected:
        std::shared_ptr<Module> module;
--- a/src/core/hle/service/acc/acc_su.cpp
+++ b/src/core/hle/service/acc/acc_su.cpp
@@ -17,7 +17,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {5, &ACC_SU::GetProfile, "GetProfile"},
        {6, nullptr, "GetProfileDigest"},
        {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
-        {51, nullptr, "TrySelectUserWithoutInteraction"},
+        {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
        {60, nullptr, "ListOpenContextStoredUsers"},
        {100, nullptr, "GetUserRegistrationNotifier"},
        {101, nullptr, "GetUserStateChangeNotifier"},
--- a/src/core/hle/service/acc/acc_u0.cpp
+++ b/src/core/hle/service/acc/acc_u0.cpp
@@ -17,7 +17,7 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {5, &ACC_U0::GetProfile, "GetProfile"},
        {6, nullptr, "GetProfileDigest"},
        {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
-        {51, nullptr, "TrySelectUserWithoutInteraction"},
+        {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
        {60, nullptr, "ListOpenContextStoredUsers"},
        {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"},
        {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},
--- a/src/core/hle/service/acc/acc_u1.cpp
+++ b/src/core/hle/service/acc/acc_u1.cpp
@@ -17,7 +17,7 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {5, &ACC_U1::GetProfile, "GetProfile"},
        {6, nullptr, "GetProfileDigest"},
        {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
-        {51, nullptr, "TrySelectUserWithoutInteraction"},
+        {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
        {60, nullptr, "ListOpenContextStoredUsers"},
        {100, nullptr, "GetUserRegistrationNotifier"},
        {101, nullptr, "GetUserStateChangeNotifier"},
--- a/src/core/hle/service/acc/profile_manager.cpp
+++ b/src/core/hle/service/acc/profile_manager.cpp
@@ -195,7 +195,7 @@ std::size_t ProfileManager::GetOpenUserCount() const {

 /// Checks if a user id exists in our profile manager
 bool ProfileManager::UserExists(UUID uuid) const {
-    return GetUserIndex(uuid) != std::nullopt;
+    return GetUserIndex(uuid).has_value();
 }

 bool ProfileManager::UserExistsIndex(std::size_t index) const {
--- a/src/core/hle/service/acc/profile_manager.h
+++ b/src/core/hle/service/acc/profile_manager.h
@@ -57,7 +57,8 @@ struct UUID {
 };
 static_assert(sizeof(UUID) == 16, "UUID is an invalid size!");

-using ProfileUsername = std::array<u8, 0x20>;
+constexpr std::size_t profile_username_size = 32;
+using ProfileUsername = std::array<u8, profile_username_size>;
 using ProfileData = std::array<u8, MAX_DATA>;
 using UserIDArray = std::array<UUID, MAX_USERS>;

--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -743,7 +743,7 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {

    Account::ProfileManager profile_manager{};
    const auto uuid = profile_manager.GetUser(Settings::values.current_user);
-    ASSERT(uuid != std::nullopt);
+    ASSERT(uuid);
    params.current_user = uuid->uuid;

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -161,7 +161,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
    ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");

    std::size_t worker_sz = WorkerBufferSize(channel_count);
-    ASSERT_MSG(buffer_sz < worker_sz, "Worker buffer too large");
+    ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
    std::unique_ptr<OpusDecoder, OpusDeleter> decoder{
        static_cast<OpusDecoder*>(operator new(worker_sz))};
    if (opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -273,8 +273,8 @@ public:
            {0, &IFileSystem::CreateFile, "CreateFile"},
            {1, &IFileSystem::DeleteFile, "DeleteFile"},
            {2, &IFileSystem::CreateDirectory, "CreateDirectory"},
-            {3, nullptr, "DeleteDirectory"},
-            {4, nullptr, "DeleteDirectoryRecursively"},
+            {3, &IFileSystem::DeleteDirectory, "DeleteDirectory"},
+            {4, &IFileSystem::DeleteDirectoryRecursively, "DeleteDirectoryRecursively"},
            {5, &IFileSystem::RenameFile, "RenameFile"},
            {6, nullptr, "RenameDirectory"},
            {7, &IFileSystem::GetEntryType, "GetEntryType"},
@@ -329,6 +329,30 @@ public:
        rb.Push(backend.CreateDirectory(name));
    }

+    void DeleteDirectory(Kernel::HLERequestContext& ctx) {
+        const IPC::RequestParser rp{ctx};
+
+        const auto file_buffer = ctx.ReadBuffer();
+        std::string name = Common::StringFromBuffer(file_buffer);
+
+        LOG_DEBUG(Service_FS, "called directory {}", name);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(backend.DeleteDirectory(name));
+    }
+
+    void DeleteDirectoryRecursively(Kernel::HLERequestContext& ctx) {
+        const IPC::RequestParser rp{ctx};
+
+        const auto file_buffer = ctx.ReadBuffer();
+        std::string name = Common::StringFromBuffer(file_buffer);
+
+        LOG_DEBUG(Service_FS, "called directory {}", name);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(backend.DeleteDirectoryRecursively(name));
+    }
+
    void RenameFile(Kernel::HLERequestContext& ctx) {
        IPC::RequestParser rp{ctx};

--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -392,8 +392,10 @@ std::size_t Controller_NPad::GetSupportedNPadIdTypesSize() const {
 }

 void Controller_NPad::SetHoldType(NpadHoldType joy_hold_type) {
+    styleset_changed_event->Signal();
    hold_type = joy_hold_type;
 }
+
 Controller_NPad::NpadHoldType Controller_NPad::GetHoldType() const {
    return hold_type;
 }
@@ -427,6 +429,9 @@ void Controller_NPad::VibrateController(const std::vector<u32>& controller_ids,
 }

 Kernel::SharedPtr<Kernel::Event> Controller_NPad::GetStyleSetChangedEvent() const {
+    // TODO(ogniK): Figure out the best time to signal this event. This event seems that it should
+    // be signalled at least once, and signaled after a new controller is connected?
+    styleset_changed_event->Signal();
    return styleset_changed_event;
 }

--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -96,6 +96,8 @@ public:
        // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)

        CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event);
+
+        ReloadInputDevices();
    }

    void ActivateController(HidController controller) {
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -3,9 +3,13 @@
 // Refer to the license.txt file included.

 #include <memory>
+#include <fmt/format.h>

+#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/process.h"
 #include "core/hle/service/ldr/ldr.h"
 #include "core/hle/service/service.h"
+#include "core/loader/nro.h"

 namespace Service::LDR {

@@ -59,16 +63,58 @@ public:
    explicit RelocatableObject() : ServiceFramework{"ldr:ro"} {
        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, nullptr, "LoadNro"},
+            {0, &RelocatableObject::LoadNro, "LoadNro"},
            {1, nullptr, "UnloadNro"},
-            {2, nullptr, "LoadNrr"},
+            {2, &RelocatableObject::LoadNrr, "LoadNrr"},
            {3, nullptr, "UnloadNrr"},
-            {4, nullptr, "Initialize"},
+            {4, &RelocatableObject::Initialize, "Initialize"},
        };
        // clang-format on

        RegisterHandlers(functions);
    }
+
+    void LoadNrr(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_WARNING(Service_LDR, "(STUBBED) called");
+    }
+
+    void LoadNro(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        rp.Skip(2, false);
+        const VAddr nro_addr{rp.Pop<VAddr>()};
+        const u64 nro_size{rp.Pop<u64>()};
+        const VAddr bss_addr{rp.Pop<VAddr>()};
+        const u64 bss_size{rp.Pop<u64>()};
+
+        // Read NRO data from memory
+        std::vector<u8> nro_data(nro_size);
+        Memory::ReadBlock(nro_addr, nro_data.data(), nro_size);
+
+        // Load NRO as new executable module
+        const VAddr addr{*Core::CurrentProcess()->VMManager().FindFreeRegion(nro_size + bss_size)};
+        Loader::AppLoader_NRO::LoadNro(nro_data, fmt::format("nro-{:08x}", addr), addr);
+
+        // TODO(bunnei): This is an incomplete implementation. It was tested with Super Mario Party.
+        // It is currently missing:
+        // - Signature checks with LoadNRR
+        // - Checking if a module has already been loaded
+        // - Using/validating BSS, etc. params (these are used from NRO header instead)
+        // - Error checking
+        // - ...Probably other things
+
+        IPC::ResponseBuilder rb{ctx, 4};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push(addr);
+        LOG_WARNING(Service_LDR, "(STUBBED) called");
+    }
+
+    void Initialize(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_WARNING(Service_LDR, "(STUBBED) called");
+    }
 };

 void InstallInterfaces(SM::ServiceManager& sm) {
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -328,13 +328,15 @@ void Module::Interface::CreateUserInterface(Kernel::HLERequestContext& ctx) {
    rb.PushIpcInterface<IUser>(*this);
 }

-void Module::Interface::LoadAmiibo(const std::vector<u8>& buffer) {
+bool Module::Interface::LoadAmiibo(const std::vector<u8>& buffer) {
    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
    if (buffer.size() < sizeof(AmiiboFile)) {
-        return; // Failed to load file
+        return false;
    }
+
    std::memcpy(&amiibo, buffer.data(), sizeof(amiibo));
    nfc_tag_load->Signal();
+    return true;
 }
 const Kernel::SharedPtr<Kernel::Event>& Module::Interface::GetNFCEvent() const {
    return nfc_tag_load;
--- a/src/core/hle/service/nfp/nfp.h
+++ b/src/core/hle/service/nfp/nfp.h
@@ -32,7 +32,7 @@ public:
        static_assert(sizeof(AmiiboFile) == 0x94, "AmiiboFile is an invalid size");

        void CreateUserInterface(Kernel::HLERequestContext& ctx);
-        void LoadAmiibo(const std::vector<u8>& buffer);
+        bool LoadAmiibo(const std::vector<u8>& buffer);
        const Kernel::SharedPtr<Kernel::Event>& GetNFCEvent() const;
        const AmiiboFile& GetAmiiboBuffer() const;

--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -31,7 +31,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
    buffer_wait_event->Signal();
 }

-boost::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) {
+std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) {
    auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
        // Only consider free buffers. Buffers become free once again after they've been Acquired
        // and Released by the compositor, see the NVFlinger::Compose method.
@@ -44,7 +44,7 @@ boost::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) {
    });

    if (itr == queue.end()) {
-        return boost::none;
+        return {};
    }

    itr->status = Buffer::Status::Dequeued;
@@ -70,12 +70,12 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
    itr->crop_rect = crop_rect;
 }

-boost::optional<const BufferQueue::Buffer&> BufferQueue::AcquireBuffer() {
+std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
    auto itr = std::find_if(queue.begin(), queue.end(), [](const Buffer& buffer) {
        return buffer.status == Buffer::Status::Queued;
    });
    if (itr == queue.end())
-        return boost::none;
+        return {};
    itr->status = Buffer::Status::Acquired;
    return *itr;
 }
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -4,8 +4,9 @@

 #pragma once

+#include <optional>
 #include <vector>
-#include <boost/optional.hpp>
+
 #include "common/common_funcs.h"
 #include "common/math_util.h"
 #include "common/swap.h"
@@ -57,9 +58,9 @@ public:
        /// Rotate source image 90 degrees clockwise
        Rotate90 = 0x04,
        /// Rotate source image 180 degrees
-        Roate180 = 0x03,
+        Rotate180 = 0x03,
        /// Rotate source image 270 degrees clockwise
-        Roate270 = 0x07,
+        Rotate270 = 0x07,
    };

    struct Buffer {
@@ -73,11 +74,11 @@ public:
    };

    void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
-    boost::optional<u32> DequeueBuffer(u32 width, u32 height);
+    std::optional<u32> DequeueBuffer(u32 width, u32 height);
    const IGBPBuffer& RequestBuffer(u32 slot) const;
    void QueueBuffer(u32 slot, BufferTransformFlags transform,
                     const MathUtil::Rectangle<int>& crop_rect);
-    boost::optional<const Buffer&> AcquireBuffer();
+    std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
    void ReleaseBuffer(u32 slot);
    u32 Query(QueryType type);

--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -3,7 +3,7 @@
 // Refer to the license.txt file included.

 #include <algorithm>
-#include <boost/optional.hpp>
+#include <optional>

 #include "common/alignment.h"
 #include "common/assert.h"
@@ -134,7 +134,7 @@ void NVFlinger::Compose() {

        MicroProfileFlip();

-        if (buffer == boost::none) {
+        if (!buffer) {
            auto& system_instance = Core::System::GetInstance();

            // There was no queued buffer to draw, render previous frame
@@ -143,7 +143,7 @@ void NVFlinger::Compose() {
            continue;
        }

-        auto& igbp_buffer = buffer->igbp_buffer;
+        auto& igbp_buffer = buffer->get().igbp_buffer;

        // Now send the buffer to the GPU for drawing.
        // TODO(Subv): Support more than just disp0. The display device selection is probably based
@@ -152,10 +152,10 @@ void NVFlinger::Compose() {
        ASSERT(nvdisp);

        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
-                     igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, buffer->transform,
-                     buffer->crop_rect);
+                     igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
+                     buffer->get().transform, buffer->get().crop_rect);

-        buffer_queue->ReleaseBuffer(buffer->slot);
+        buffer_queue->ReleaseBuffer(buffer->get().slot);
    }
 }

--- a/src/core/hle/service/usb/usb.cpp
+++ b/src/core/hle/service/usb/usb.cpp
@@ -132,11 +132,11 @@ public:
        // clang-format off
        static const FunctionInfo functions[] = {
            {0, nullptr, "BindNoticeEvent"},
-            {1, nullptr, "Unknown1"},
+            {1, nullptr, "UnbindNoticeEvent"},
            {2, nullptr, "GetStatus"},
            {3, nullptr, "GetNotice"},
-            {4, nullptr, "Unknown2"},
-            {5, nullptr, "Unknown3"},
+            {4, nullptr, "EnablePowerRequestNotice"},
+            {5, nullptr, "DisablePowerRequestNotice"},
            {6, nullptr, "ReplyPowerRequest"},
        };
        // clang-format on
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -6,9 +6,10 @@
 #include <array>
 #include <cstring>
 #include <memory>
+#include <optional>
 #include <type_traits>
 #include <utility>
-#include <boost/optional.hpp>
+
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_funcs.h"
@@ -506,9 +507,9 @@ private:
            IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
            const u32 width{request.data.width};
            const u32 height{request.data.height};
-            boost::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
+            std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);

-            if (slot != boost::none) {
+            if (slot) {
                // Buffer is available
                IGBPDequeueBufferResponseParcel response{*slot};
                ctx.WriteBuffer(response.Serialize());
@@ -520,7 +521,7 @@ private:
                        Kernel::ThreadWakeupReason reason) {
                        // Repeat TransactParcel DequeueBuffer when a buffer is available
                        auto buffer_queue = nv_flinger->GetBufferQueue(id);
-                        boost::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
+                        std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
                        IGBPDequeueBufferResponseParcel response{*slot};
                        ctx.WriteBuffer(response.Serialize());
                        IPC::ResponseBuilder rb{ctx, 2};
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -6,10 +6,11 @@

 #include <iosfwd>
 #include <memory>
+#include <optional>
 #include <string>
 #include <utility>
 #include <vector>
-#include <boost/optional.hpp>
+
 #include "common/common_types.h"
 #include "core/file_sys/vfs.h"

@@ -145,7 +146,7 @@ public:
     * information.
     * @returns A pair with the optional system mode, and and the status.
     */
-    virtual std::pair<boost::optional<u32>, ResultStatus> LoadKernelSystemMode() {
+    virtual std::pair<std::optional<u32>, ResultStatus> LoadKernelSystemMode() {
        // 96MB allocated to the application.
        return std::make_pair(2, ResultStatus::Success);
    }
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -127,18 +127,23 @@ static constexpr u32 PageAlignSize(u32 size) {
    return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
 }

-bool AppLoader_NRO::LoadNro(const FileSys::VfsFile& file, VAddr load_base) {
-    // Read NSO header
-    NroHeader nro_header{};
-    if (sizeof(NroHeader) != file.ReadObject(&nro_header)) {
+/*static*/ bool AppLoader_NRO::LoadNro(const std::vector<u8>& data, const std::string& name,
+                                       VAddr load_base) {
+
+    if (data.size() < sizeof(NroHeader)) {
        return {};
    }
+
+    // Read NSO header
+    NroHeader nro_header{};
+    std::memcpy(&nro_header, data.data(), sizeof(NroHeader));
    if (nro_header.magic != Common::MakeMagic('N', 'R', 'O', '0')) {
        return {};
    }

    // Build program image
-    std::vector<u8> program_image = file.ReadBytes(PageAlignSize(nro_header.file_size));
+    std::vector<u8> program_image(PageAlignSize(nro_header.file_size));
+    std::memcpy(program_image.data(), data.data(), program_image.size());
    if (program_image.size() != PageAlignSize(nro_header.file_size)) {
        return {};
    }
@@ -182,11 +187,15 @@ bool AppLoader_NRO::LoadNro(const FileSys::VfsFile& file, VAddr load_base) {
    Core::CurrentProcess()->LoadModule(std::move(codeset), load_base);

    // Register module with GDBStub
-    GDBStub::RegisterModule(file.GetName(), load_base, load_base);
+    GDBStub::RegisterModule(name, load_base, load_base);

    return true;
 }

+bool AppLoader_NRO::LoadNro(const FileSys::VfsFile& file, VAddr load_base) {
+    return AppLoader_NRO::LoadNro(file.ReadAllBytes(), file.GetName(), load_base);
+}
+
 ResultStatus AppLoader_NRO::Load(Kernel::Process& process) {
    if (is_loaded) {
        return ResultStatus::ErrorAlreadyLoaded;
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <string>
+#include <vector>
 #include "common/common_types.h"
 #include "core/loader/linker.h"
 #include "core/loader/loader.h"
@@ -40,6 +41,8 @@ public:
    ResultStatus ReadTitle(std::string& title) override;
    bool IsRomFSUpdatable() const override;

+    static bool LoadNro(const std::vector<u8>& data, const std::string& name, VAddr load_base);
+
 private:
    bool LoadNro(const FileSys::VfsFile& file, VAddr load_base);

--- a/src/core/loader/nsp.cpp
+++ b/src/core/loader/nsp.cpp
@@ -36,6 +36,16 @@ AppLoader_NSP::AppLoader_NSP(FileSys::VirtualFile file)

    std::tie(nacp_file, icon_file) =
        FileSys::PatchManager(nsp->GetProgramTitleID()).ParseControlNCA(*control_nca);
+
+    if (nsp->IsExtractedType()) {
+        secondary_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(nsp->GetExeFS());
+    } else {
+        if (title_id == 0)
+            return;
+
+        secondary_loader = std::make_unique<AppLoader_NCA>(
+            nsp->GetNCAFile(title_id, FileSys::ContentRecordType::Program));
+    }
 }

 AppLoader_NSP::~AppLoader_NSP() = default;
@@ -67,26 +77,19 @@ ResultStatus AppLoader_NSP::Load(Kernel::Process& process) {
        return ResultStatus::ErrorAlreadyLoaded;
    }

-    if (nsp->IsExtractedType()) {
-        secondary_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(nsp->GetExeFS());
-    } else {
-        if (title_id == 0)
-            return ResultStatus::ErrorNSPMissingProgramNCA;
+    if (title_id == 0)
+        return ResultStatus::ErrorNSPMissingProgramNCA;

-        secondary_loader = std::make_unique<AppLoader_NCA>(
-            nsp->GetNCAFile(title_id, FileSys::ContentRecordType::Program));
+    if (nsp->GetStatus() != ResultStatus::Success)
+        return nsp->GetStatus();

-        if (nsp->GetStatus() != ResultStatus::Success)
-            return nsp->GetStatus();
+    if (nsp->GetProgramStatus(title_id) != ResultStatus::Success)
+        return nsp->GetProgramStatus(title_id);

-        if (nsp->GetProgramStatus(title_id) != ResultStatus::Success)
-            return nsp->GetProgramStatus(title_id);
-
-        if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) {
-            if (!Core::Crypto::KeyManager::KeyFileExists(false))
-                return ResultStatus::ErrorMissingProductionKeyFile;
-            return ResultStatus::ErrorNSPMissingProgramNCA;
-        }
+    if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) {
+        if (!Core::Crypto::KeyManager::KeyFileExists(false))
+            return ResultStatus::ErrorMissingProductionKeyFile;
+        return ResultStatus::ErrorNSPMissingProgramNCA;
    }

    const auto result = secondary_loader->Load(process);
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -4,9 +4,9 @@

 #include <algorithm>
 #include <cstring>
+#include <optional>
 #include <utility>

-#include <boost/optional.hpp>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
--- a/src/core/memory_hook.h
+++ b/src/core/memory_hook.h
@@ -5,7 +5,8 @@
 #pragma once

 #include <memory>
-#include <boost/optional.hpp>
+#include <optional>
+
 #include "common/common_types.h"

 namespace Memory {
@@ -18,19 +19,19 @@ namespace Memory {
 *
 * A hook may be mapped to multiple regions of memory.
 *
- * If a boost::none or false is returned from a function, the read/write request is passed through
+ * If a std::nullopt or false is returned from a function, the read/write request is passed through
 * to the underlying memory region.
 */
 class MemoryHook {
 public:
    virtual ~MemoryHook();

-    virtual boost::optional<bool> IsValidAddress(VAddr addr) = 0;
+    virtual std::optional<bool> IsValidAddress(VAddr addr) = 0;

-    virtual boost::optional<u8> Read8(VAddr addr) = 0;
-    virtual boost::optional<u16> Read16(VAddr addr) = 0;
-    virtual boost::optional<u32> Read32(VAddr addr) = 0;
-    virtual boost::optional<u64> Read64(VAddr addr) = 0;
+    virtual std::optional<u8> Read8(VAddr addr) = 0;
+    virtual std::optional<u16> Read16(VAddr addr) = 0;
+    virtual std::optional<u32> Read32(VAddr addr) = 0;
+    virtual std::optional<u64> Read64(VAddr addr) = 0;

    virtual bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) = 0;

--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -184,4 +184,13 @@ TelemetrySession::~TelemetrySession() {
    backend = nullptr;
 }

+bool TelemetrySession::SubmitTestcase() {
+#ifdef ENABLE_WEB_SERVICE
+    field_collection.Accept(*backend);
+    return backend->SubmitTestcase();
+#else
+    return false;
+#endif
+}
+
 } // namespace Core
--- a/src/core/telemetry_session.h
+++ b/src/core/telemetry_session.h
@@ -31,6 +31,12 @@ public:
        field_collection.AddField(type, name, std::move(value));
    }

+    /**
+     * Submits a Testcase.
+     * @returns A bool indicating whether the submission succeeded
+     */
+    bool SubmitTestcase();
+
 private:
    Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session
    std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -64,11 +64,11 @@ void TestEnvironment::ClearWriteRecords() {

 TestEnvironment::TestMemory::~TestMemory() {}

-boost::optional<bool> TestEnvironment::TestMemory::IsValidAddress(VAddr addr) {
+std::optional<bool> TestEnvironment::TestMemory::IsValidAddress(VAddr addr) {
    return true;
 }

-boost::optional<u8> TestEnvironment::TestMemory::Read8(VAddr addr) {
+std::optional<u8> TestEnvironment::TestMemory::Read8(VAddr addr) {
    const auto iter = data.find(addr);

    if (iter == data.end()) {
@@ -79,15 +79,15 @@ boost::optional<u8> TestEnvironment::TestMemory::Read8(VAddr addr) {
    return iter->second;
 }

-boost::optional<u16> TestEnvironment::TestMemory::Read16(VAddr addr) {
+std::optional<u16> TestEnvironment::TestMemory::Read16(VAddr addr) {
    return *Read8(addr) | static_cast<u16>(*Read8(addr + 1)) << 8;
 }

-boost::optional<u32> TestEnvironment::TestMemory::Read32(VAddr addr) {
+std::optional<u32> TestEnvironment::TestMemory::Read32(VAddr addr) {
    return *Read16(addr) | static_cast<u32>(*Read16(addr + 2)) << 16;
 }

-boost::optional<u64> TestEnvironment::TestMemory::Read64(VAddr addr) {
+std::optional<u64> TestEnvironment::TestMemory::Read64(VAddr addr) {
    return *Read32(addr) | static_cast<u64>(*Read32(addr + 4)) << 32;
 }

--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -64,12 +64,12 @@ private:

        ~TestMemory() override;

-        boost::optional<bool> IsValidAddress(VAddr addr) override;
+        std::optional<bool> IsValidAddress(VAddr addr) override;

-        boost::optional<u8> Read8(VAddr addr) override;
-        boost::optional<u16> Read16(VAddr addr) override;
-        boost::optional<u32> Read32(VAddr addr) override;
-        boost::optional<u64> Read64(VAddr addr) override;
+        std::optional<u8> Read8(VAddr addr) override;
+        std::optional<u16> Read16(VAddr addr) override;
+        std::optional<u32> Read32(VAddr addr) override;
+        std::optional<u64> Read64(VAddr addr) override;

        bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) override;

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -33,6 +33,7 @@ add_library(video_core STATIC
    renderer_opengl/gl_rasterizer.h
    renderer_opengl/gl_rasterizer_cache.cpp
    renderer_opengl/gl_rasterizer_cache.h
+    renderer_opengl/gl_resource_manager.cpp
    renderer_opengl/gl_resource_manager.h
    renderer_opengl/gl_shader_cache.cpp
    renderer_opengl/gl_shader_cache.h
@@ -51,6 +52,10 @@ add_library(video_core STATIC
    renderer_opengl/maxwell_to_gl.h
    renderer_opengl/renderer_opengl.cpp
    renderer_opengl/renderer_opengl.h
+    renderer_opengl/utils.cpp
+    renderer_opengl/utils.h
+    surface.cpp
+    surface.h
    textures/astc.cpp
    textures/astc.h
    textures/decoders.cpp
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -81,7 +81,7 @@ void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
    for (auto entry : commands) {
        Tegra::GPUVAddr address = entry.Address();
        u32 size = entry.sz;
-        const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
+        const std::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
        VAddr current_addr = *head_address;
        while (current_addr < *head_address + size * sizeof(CommandHeader)) {
            const CommandHeader header = {Memory::Read32(current_addr)};
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <cinttypes>
+#include <cstring>
 #include "common/assert.h"
 #include "core/core.h"
 #include "core/core_timing.h"
@@ -19,21 +20,56 @@ namespace Tegra::Engines {
 constexpr u32 MacroRegistersStart = 0xE00;

 Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {}
+    : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {
+    InitializeRegisterDefaults();
+}
+
+void Maxwell3D::InitializeRegisterDefaults() {
+    // Initializes registers to their default values - what games expect them to be at boot. This is
+    // for certain registers that may not be explicitly set by games.
+
+    // Reset all registers to zero
+    std::memset(&regs, 0, sizeof(regs));
+
+    // Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
+    // needed for ARMS.
+    for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) {
+        regs.viewport[viewport].depth_range_near = 0.0f;
+        regs.viewport[viewport].depth_range_far = 1.0f;
+    }
+    // Doom and Bomberman seems to use the uninitialized registers and just enable blend
+    // so initialize blend registers with sane values
+    regs.blend.equation_rgb = Regs::Blend::Equation::Add;
+    regs.blend.factor_source_rgb = Regs::Blend::Factor::One;
+    regs.blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
+    regs.blend.equation_a = Regs::Blend::Equation::Add;
+    regs.blend.factor_source_a = Regs::Blend::Factor::One;
+    regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
+    for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) {
+        regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add;
+        regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One;
+        regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero;
+        regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add;
+        regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One;
+        regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero;
+    }
+}

 void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
    // Reset the current macro.
    executing_macro = 0;

-    // The requested macro must have been uploaded already.
-    auto macro_code = uploaded_macros.find(method);
-    if (macro_code == uploaded_macros.end()) {
-        LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method);
+    // Lookup the macro offset
+    const u32 entry{(method - MacroRegistersStart) >> 1};
+    const auto& search{macro_offsets.find(entry)};
+    if (search == macro_offsets.end()) {
+        LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
+        UNREACHABLE();
        return;
    }

    // Execute the current macro.
-    macro_interpreter.Execute(macro_code->second, std::move(parameters));
+    macro_interpreter.Execute(search->second, std::move(parameters));
 }

 void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
@@ -79,6 +115,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
        ProcessMacroUpload(value);
        break;
    }
+    case MAXWELL3D_REG_INDEX(macros.bind): {
+        ProcessMacroBind(value);
+        break;
+    }
    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
@@ -140,16 +180,20 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
 }

 void Maxwell3D::ProcessMacroUpload(u32 data) {
-    // Store the uploaded macro code to interpret them when they're called.
-    auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart];
-    macro.push_back(data);
+    ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
+               "upload_address exceeded macro_memory size!");
+    macro_memory[regs.macros.upload_address++] = data;
+}
+
+void Maxwell3D::ProcessMacroBind(u32 data) {
+    macro_offsets[regs.macros.entry] = data;
 }

 void Maxwell3D::ProcessQueryGet() {
    GPUVAddr sequence_address = regs.query.QueryAddress();
    // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
    // VAddr before writing.
-    boost::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
+    std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);

    // TODO(Subv): Support the other query units.
    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -267,7 +311,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
    // Don't allow writing past the end of the buffer.
    ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);

-    boost::optional<VAddr> address =
+    std::optional<VAddr> address =
        memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);

    Memory::Write32(*address, value);
@@ -280,7 +324,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
    GPUVAddr tic_base_address = regs.tic.TICAddress();

    GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
-    boost::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
+    std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);

    Texture::TICEntry tic_entry;
    Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -304,7 +348,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
    GPUVAddr tsc_base_address = regs.tsc.TSCAddress();

    GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
-    boost::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
+    std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);

    Texture::TSCEntry tsc_entry;
    Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
@@ -368,7 +412,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,

    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);

-    boost::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
+    std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
    Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};

    Texture::FullTextureInfo tex_info{};
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -462,6 +462,16 @@ public:
            }
        };

+        struct ColorMask {
+            union {
+                u32 raw;
+                BitField<0, 4, u32> R;
+                BitField<4, 4, u32> G;
+                BitField<8, 4, u32> B;
+                BitField<12, 4, u32> A;
+            };
+        };
+
        bool IsShaderConfigEnabled(std::size_t index) const {
            // The VertexB is always enabled.
            if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
@@ -475,12 +485,13 @@ public:
                INSERT_PADDING_WORDS(0x45);

                struct {
-                    INSERT_PADDING_WORDS(1);
+                    u32 upload_address;
                    u32 data;
                    u32 entry;
+                    u32 bind;
                } macros;

-                INSERT_PADDING_WORDS(0x189);
+                INSERT_PADDING_WORDS(0x188);

                u32 tfb_enabled;

@@ -570,7 +581,11 @@ public:
                u32 stencil_back_mask;
                u32 stencil_back_func_mask;

-                INSERT_PADDING_WORDS(0x13);
+                INSERT_PADDING_WORDS(0xC);
+
+                u32 color_mask_common;
+
+                INSERT_PADDING_WORDS(0x6);

                u32 rt_separate_frag_data;

@@ -645,8 +660,14 @@ public:
                ComparisonOp depth_test_func;
                float alpha_test_ref;
                ComparisonOp alpha_test_func;
-
-                INSERT_PADDING_WORDS(0x9);
+                u32 draw_tfb_stride;
+                struct {
+                    float r;
+                    float g;
+                    float b;
+                    float a;
+                } blend_color;
+                INSERT_PADDING_WORDS(0x4);

                struct {
                    u32 separate_alpha;
@@ -723,7 +744,11 @@ public:
                StencilOp stencil_back_op_zpass;
                ComparisonOp stencil_back_func_func;

-                INSERT_PADDING_WORDS(0x17);
+                INSERT_PADDING_WORDS(0x4);
+
+                u32 framebuffer_srgb;
+
+                INSERT_PADDING_WORDS(0x12);

                union {
                    BitField<2, 1, u32> coord_origin;
@@ -751,7 +776,14 @@ public:
                    };
                } draw;

-                INSERT_PADDING_WORDS(0x6B);
+                INSERT_PADDING_WORDS(0xA);
+
+                struct {
+                    u32 enabled;
+                    u32 index;
+                } primitive_restart;
+
+                INSERT_PADDING_WORDS(0x5F);

                struct {
                    u32 start_addr_high;
@@ -829,8 +861,9 @@ public:
                    BitField<6, 4, u32> RT;
                    BitField<10, 11, u32> layer;
                } clear_buffers;
-
-                INSERT_PADDING_WORDS(0x4B);
+                INSERT_PADDING_WORDS(0xB);
+                std::array<ColorMask, NumRenderTargets> color_mask;
+                INSERT_PADDING_WORDS(0x38);

                struct {
                    u32 query_address_high;
@@ -983,10 +1016,25 @@ public:
    /// Returns the texture information for a specific texture in a specific shader stage.
    Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;

+    /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
+    /// we've seen used.
+    using MacroMemory = std::array<u32, 0x40000>;
+
+    /// Gets a reference to macro memory.
+    const MacroMemory& GetMacroMemory() const {
+        return macro_memory;
+    }
+
 private:
+    void InitializeRegisterDefaults();
+
    VideoCore::RasterizerInterface& rasterizer;

-    std::unordered_map<u32, std::vector<u32>> uploaded_macros;
+    /// Start offsets of each macro in macro_memory
+    std::unordered_map<u32, u32> macro_offsets;
+
+    /// Memory for macro code
+    MacroMemory macro_memory;

    /// Macro method that is currently being executed / being fed parameters.
    u32 executing_macro = 0;
@@ -1009,9 +1057,12 @@ private:
     */
    void CallMacroMethod(u32 method, std::vector<u32> parameters);

-    /// Handles writes to the macro uploading registers.
+    /// Handles writes to the macro uploading register.
    void ProcessMacroUpload(u32 data);

+    /// Handles writes to the macro bind register.
+    void ProcessMacroBind(u32 data);
+
    /// Handles a write to the CLEAR_BUFFERS register.
    void ProcessClearBuffers();

@@ -1045,6 +1096,7 @@ ASSERT_REG_POSITION(scissor_test, 0x380);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
 ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(color_mask_common, 0x3E4);
 ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1057,6 +1109,10 @@ ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
 ASSERT_REG_POSITION(alpha_test_enabled, 0x4BB);
 ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
 ASSERT_REG_POSITION(depth_test_func, 0x4C3);
+ASSERT_REG_POSITION(alpha_test_ref, 0x4C4);
+ASSERT_REG_POSITION(alpha_test_func, 0x4C5);
+ASSERT_REG_POSITION(draw_tfb_stride, 0x4C6);
+ASSERT_REG_POSITION(blend_color, 0x4C7);
 ASSERT_REG_POSITION(blend, 0x4CF);
 ASSERT_REG_POSITION(stencil_enable, 0x4E0);
 ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1);
@@ -1077,14 +1133,17 @@ ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
 ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567);
 ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
 ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
+ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
 ASSERT_REG_POSITION(point_coord_replace, 0x581);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
+ASSERT_REG_POSITION(primitive_restart, 0x591);
 ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(instanced_arrays, 0x620);
 ASSERT_REG_POSITION(cull, 0x646);
 ASSERT_REG_POSITION(logic_op, 0x671);
 ASSERT_REG_POSITION(clear_buffers, 0x674);
+ASSERT_REG_POSITION(color_mask, 0x680);
 ASSERT_REG_POSITION(query, 0x6C0);
 ASSERT_REG_POSITION(vertex_array[0], 0x700);
 ASSERT_REG_POSITION(independent_blend, 0x780);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -5,12 +5,11 @@
 #pragma once

 #include <bitset>
+#include <optional>
 #include <string>
 #include <tuple>
 #include <vector>

-#include <boost/optional.hpp>
-
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_types.h"
@@ -208,6 +207,16 @@ enum class UniformType : u64 {
    Double = 5,
 };

+enum class StoreType : u64 {
+    Unsigned8 = 0,
+    Signed8 = 1,
+    Unsigned16 = 2,
+    Signed16 = 3,
+    Bytes32 = 4,
+    Bytes64 = 5,
+    Bytes128 = 6,
+};
+
 enum class IMinMaxExchange : u64 {
    None = 0,
    XLo = 1,
@@ -568,6 +577,10 @@ union Instruction {
        BitField<55, 1, u64> saturate;
    } fmul32;

+    union {
+        BitField<52, 1, u64> generates_cc;
+    } op_32;
+
    union {
        BitField<48, 1, u64> is_signed;
    } shift;
@@ -747,6 +760,18 @@ union Instruction {
        BitField<44, 2, u64> unknown;
    } ld_c;

+    union {
+        BitField<48, 3, StoreType> type;
+    } ldst_sl;
+
+    union {
+        BitField<44, 2, u64> unknown;
+    } ld_l;
+
+    union {
+        BitField<44, 2, u64> unknown;
+    } st_l;
+
    union {
        BitField<0, 3, u64> pred0;
        BitField<3, 3, u64> pred3;
@@ -1209,6 +1234,8 @@ union Instruction {
    BitField<61, 1, u64> is_b_imm;
    BitField<60, 1, u64> is_b_gpr;
    BitField<59, 1, u64> is_c_gpr;
+    BitField<20, 24, s64> smem_imm;
+    BitField<0, 5, ControlCode> flow_control_code;

    Attribute attribute;
    Sampler sampler;
@@ -1232,8 +1259,12 @@ public:
        BRA,
        PBK,
        LD_A,
+        LD_L,
+        LD_S,
        LD_C,
        ST_A,
+        ST_L,
+        ST_S,
        LDG, // Load from global memory
        STG, // Store in global memory
        TEX,
@@ -1429,7 +1460,7 @@ public:
        Type type;
    };

-    static boost::optional<const Matcher&> Decode(Instruction instr) {
+    static std::optional<std::reference_wrapper<const Matcher>> Decode(Instruction instr) {
        static const auto table{GetDecodeTable()};

        const auto matches_instruction = [instr](const auto& matcher) {
@@ -1437,7 +1468,8 @@ public:
        };

        auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
-        return iter != table.end() ? boost::optional<const Matcher&>(*iter) : boost::none;
+        return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter)
+                                   : std::nullopt;
    }

 private:
@@ -1490,8 +1522,12 @@ private:
            INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
            INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
            INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
+            INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
+            INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
            INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
            INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+            INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
+            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
            INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
            INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
@@ -1627,4 +1663,4 @@ private:
    }
 };

-} // namespace Tegra::Shader
+} // namespace Tegra::Shader
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -96,6 +96,11 @@ struct Header {
            }
        } ps;
    };
+
+    u64 GetLocalMemorySize() {
+        return (common1.shader_local_memory_low_size |
+                (common2.shader_local_memory_high_size << 24));
+    }
 };

 static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -11,7 +11,7 @@ namespace Tegra {

 MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}

-void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) {
+void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
    Reset();
    registers[1] = parameters[0];
    this->parameters = std::move(parameters);
@@ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa
    // Execute the code until we hit an exit condition.
    bool keep_executing = true;
    while (keep_executing) {
-        keep_executing = Step(code, false);
+        keep_executing = Step(offset, false);
    }

    // Assert the the macro used all the input parameters
@@ -29,7 +29,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa
 void MacroInterpreter::Reset() {
    registers = {};
    pc = 0;
-    delayed_pc = boost::none;
+    delayed_pc = {};
    method_address.raw = 0;
    parameters.clear();
    // The next parameter index starts at 1, because $r1 already has the value of the first
@@ -37,17 +37,17 @@ void MacroInterpreter::Reset() {
    next_parameter_index = 1;
 }

-bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
+bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
    u32 base_address = pc;

-    Opcode opcode = GetOpcode(code);
+    Opcode opcode = GetOpcode(offset);
    pc += 4;

    // Update the program counter if we were delayed
-    if (delayed_pc != boost::none) {
+    if (delayed_pc) {
        ASSERT(is_delay_slot);
        pc = *delayed_pc;
-        delayed_pc = boost::none;
+        delayed_pc = {};
    }

    switch (opcode.operation) {
@@ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {

            delayed_pc = base_address + opcode.GetBranchTarget();
            // Execute one more instruction due to the delay slot.
-            return Step(code, true);
+            return Step(offset, true);
        }
        break;
    }
@@ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
        // Exit has a delay slot, execute the next instruction
        // Note: Executing an exit during a branch delay slot will cause the instruction at the
        // branch target to be executed before exiting.
-        Step(code, true);
+        Step(offset, true);
        return false;
    }

    return true;
 }

-MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const {
+MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const {
+    const auto& macro_memory{maxwell3d.GetMacroMemory()};
    ASSERT((pc % sizeof(u32)) == 0);
-    ASSERT(pc < code.size() * sizeof(u32));
-    return {code[pc / sizeof(u32)]};
+    ASSERT((pc + offset) < macro_memory.size() * sizeof(u32));
+    return {macro_memory[offset + pc / sizeof(u32)]};
 }

 u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const {
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -5,8 +5,9 @@
 #pragma once

 #include <array>
+#include <optional>
 #include <vector>
-#include <boost/optional.hpp>
+
 #include "common/bit_field.h"
 #include "common/common_types.h"

@@ -21,10 +22,10 @@ public:

    /**
     * Executes the macro code with the specified input parameters.
-     * @param code The macro byte code to execute
-     * @param parameters The parameters of the macro
+     * @param offset Offset to start execution at.
+     * @param parameters The parameters of the macro.
     */
-    void Execute(const std::vector<u32>& code, std::vector<u32> parameters);
+    void Execute(u32 offset, std::vector<u32> parameters);

 private:
    enum class Operation : u32 {
@@ -109,11 +110,11 @@ private:
    /**
     * Executes a single macro instruction located at the current program counter. Returns whether
     * the interpreter should keep running.
-     * @param code The macro code to execute.
+     * @param offset Offset to start execution at.
     * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
     * previous instruction.
     */
-    bool Step(const std::vector<u32>& code, bool is_delay_slot);
+    bool Step(u32 offset, bool is_delay_slot);

    /// Calculates the result of an ALU operation. src_a OP src_b;
    u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const;
@@ -126,7 +127,7 @@ private:
    bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;

    /// Reads an opcode at the current program counter location.
-    Opcode GetOpcode(const std::vector<u32>& code) const;
+    Opcode GetOpcode(u32 offset) const;

    /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
    u32 GetRegister(u32 register_id) const;
@@ -149,7 +150,7 @@ private:
    Engines::Maxwell3D& maxwell3d;

    u32 pc; ///< Current program counter
-    boost::optional<u32>
+    std::optional<u32>
        delayed_pc; ///< Program counter to execute at after the delay slot is executed.

    static constexpr std::size_t NumMacroRegisters = 8;
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -4,18 +4,21 @@

 #include "common/alignment.h"
 #include "common/assert.h"
+#include "common/logging/log.h"
 #include "video_core/memory_manager.h"

 namespace Tegra {

 GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
-    boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, align);
-    ASSERT(gpu_addr);
+    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};

-    for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot = PageSlot(*gpu_addr + offset);
+    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+
+    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+        VAddr& slot{PageSlot(*gpu_addr + offset)};

        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+
        slot = static_cast<u64>(PageStatus::Allocated);
    }

@@ -23,10 +26,11 @@ GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
 }

 GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
-    for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot = PageSlot(gpu_addr + offset);
+    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+        VAddr& slot{PageSlot(gpu_addr + offset)};

        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+
        slot = static_cast<u64>(PageStatus::Allocated);
    }

@@ -34,17 +38,19 @@ GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
 }

 GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
-    boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, PAGE_SIZE);
-    ASSERT(gpu_addr);
+    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)};

-    for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot = PageSlot(*gpu_addr + offset);
+    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+
+    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+        VAddr& slot{PageSlot(*gpu_addr + offset)};

        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+
        slot = cpu_addr + offset;
    }

-    MappedRegion region{cpu_addr, *gpu_addr, size};
+    const MappedRegion region{cpu_addr, *gpu_addr, size};
    mapped_regions.push_back(region);

    return *gpu_addr;
@@ -53,14 +59,31 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
 GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
    ASSERT((gpu_addr & PAGE_MASK) == 0);

-    for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot = PageSlot(gpu_addr + offset);
+    if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) {
+        // Page has been already mapped. In this case, we must find a new area of memory to use that
+        // is different than the specified one. Super Mario Odyssey hits this scenario when changing
+        // areas, but we do not want to overwrite the old pages.
+        // TODO(bunnei): We need to write a hardware test to confirm this behavior.
+
+        LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr);
+
+        const std::optional<GPUVAddr> new_gpu_addr{
+            FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)};
+
+        ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory");
+
+        gpu_addr = *new_gpu_addr;
+    }
+
+    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+        VAddr& slot{PageSlot(gpu_addr + offset)};

        ASSERT(slot == static_cast<u64>(PageStatus::Allocated));
+
        slot = cpu_addr + offset;
    }

-    MappedRegion region{cpu_addr, gpu_addr, size};
+    const MappedRegion region{cpu_addr, gpu_addr, size};
    mapped_regions.push_back(region);

    return gpu_addr;
@@ -69,11 +92,12 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
 GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
    ASSERT((gpu_addr & PAGE_MASK) == 0);

-    for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot = PageSlot(gpu_addr + offset);
+    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+        VAddr& slot{PageSlot(gpu_addr + offset)};

        ASSERT(slot != static_cast<u64>(PageStatus::Allocated) &&
               slot != static_cast<u64>(PageStatus::Unmapped));
+
        slot = static_cast<u64>(PageStatus::Unmapped);
    }

@@ -97,13 +121,14 @@ GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
    return {};
 }

-boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
-    GPUVAddr gpu_addr = 0;
-    u64 free_space = 0;
+std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
+                                                     PageStatus status) {
+    GPUVAddr gpu_addr{region_start};
+    u64 free_space{};
    align = (align + PAGE_MASK) & ~PAGE_MASK;

    while (gpu_addr + free_space < MAX_ADDRESS) {
-        if (!IsPageMapped(gpu_addr + free_space)) {
+        if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) {
            free_space += PAGE_SIZE;
            if (free_space >= size) {
                return gpu_addr;
@@ -118,8 +143,8 @@ boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
    return {};
 }

-boost::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
-    VAddr base_addr = PageSlot(gpu_addr);
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
+    const VAddr base_addr{PageSlot(gpu_addr)};

    if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
        base_addr == static_cast<u64>(PageStatus::Unmapped)) {
@@ -133,19 +158,15 @@ std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const {
    std::vector<GPUVAddr> results;
    for (const auto& region : mapped_regions) {
        if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
-            u64 offset = cpu_addr - region.cpu_addr;
+            const u64 offset{cpu_addr - region.cpu_addr};
            results.push_back(region.gpu_addr + offset);
        }
    }
    return results;
 }

-bool MemoryManager::IsPageMapped(GPUVAddr gpu_addr) {
-    return PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Unmapped);
-}
-
 VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
-    auto& block = page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK];
+    auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]};
    if (!block) {
        block = std::make_unique<PageBlock>();
        block->fill(static_cast<VAddr>(PageStatus::Unmapped));
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -6,10 +6,9 @@

 #include <array>
 #include <memory>
+#include <optional>
 #include <vector>

-#include <boost/optional.hpp>
-
 #include "common/common_types.h"

 namespace Tegra {
@@ -27,7 +26,7 @@ public:
    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
    GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
    GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
-    boost::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
+    std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
    std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;

    static constexpr u64 PAGE_BITS = 16;
@@ -35,15 +34,15 @@ public:
    static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;

 private:
-    boost::optional<GPUVAddr> FindFreeBlock(u64 size, u64 align = 1);
-    bool IsPageMapped(GPUVAddr gpu_addr);
-    VAddr& PageSlot(GPUVAddr gpu_addr);
-
    enum class PageStatus : u64 {
        Unmapped = 0xFFFFFFFFFFFFFFFFULL,
        Allocated = 0xFFFFFFFFFFFFFFFEULL,
    };

+    std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
+                                          PageStatus status);
+    VAddr& PageSlot(GPUVAddr gpu_addr);
+
    static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
    static constexpr u64 PAGE_TABLE_BITS{10};
    static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -10,10 +10,8 @@
 #include <boost/range/iterator_range_core.hpp>

 #include "common/common_types.h"
-#include "core/core.h"
 #include "core/settings.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"

 class RasterizerCacheObject {
 public:
@@ -64,6 +62,8 @@ class RasterizerCache : NonCopyable {
    friend class RasterizerCacheObject;

 public:
+    explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
+
    /// Write any cached resources overlapping the specified region back to memory
    void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -109,14 +109,12 @@ protected:
    void Register(const T& object) {
        object->SetIsRegistered(true);
        object_cache.add({GetInterval(object), ObjectSet{object}});
-        auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
    }

    /// Unregisters an object from the cache
    void Unregister(const T& object) {
        object->SetIsRegistered(false);
-        auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);

        // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
@@ -177,4 +175,5 @@ private:

    ObjectCache object_cache; ///< Cache of objects
    u64 modified_ticks{};     ///< Counter of cache state ticks, used for in-order flushing
+    VideoCore::RasterizerInterface& rasterizer;
 };
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -6,7 +6,8 @@

 #include <atomic>
 #include <memory>
-#include <boost/optional.hpp>
+#include <optional>
+
 #include "common/common_types.h"
 #include "video_core/gpu.h"
 #include "video_core/rasterizer_interface.h"
@@ -28,7 +29,8 @@ public:
    virtual ~RendererBase();

    /// Swap buffers (render frame)
-    virtual void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) = 0;
+    virtual void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;

    /// Initialize the renderer
    virtual bool Init() = 0;
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -9,15 +9,17 @@
 #include "core/core.h"
 #include "core/memory.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"

 namespace OpenGL {

-OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
+OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
+    : RasterizerCache{rasterizer}, stream_buffer(GL_ARRAY_BUFFER, size) {}

 GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
                                      std::size_t alignment, bool cache) {
    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};

    // Cache management is a big overhead, so only cache entries with a given size.
    // TODO: Figure out which size is the best for given games.
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -15,6 +15,8 @@

 namespace OpenGL {

+class RasterizerOpenGL;
+
 struct CachedBufferEntry final : public RasterizerCacheObject {
    VAddr GetAddr() const override {
        return addr;
@@ -35,7 +37,7 @@ struct CachedBufferEntry final : public RasterizerCacheObject {

 class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
 public:
-    explicit OGLBufferCache(std::size_t size);
+    explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size);

    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
    /// allocated.
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -6,6 +6,7 @@
 #include <array>
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "core/core.h"
 #include "core/memory.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_primitive_assembler.h"
@@ -45,7 +46,7 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
    auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);

    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
    const u8* source{Memory::GetPointer(*cpu_addr)};

    for (u32 primitive = 0; primitive < count / 4; ++primitive) {
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,8 +30,8 @@
 namespace OpenGL {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using PixelFormat = SurfaceParams::PixelFormat;
-using SurfaceType = SurfaceParams::SurfaceType;
+using PixelFormat = VideoCore::Surface::PixelFormat;
+using SurfaceType = VideoCore::Surface::SurfaceType;

 MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
@@ -79,7 +79,8 @@ struct DrawParameters {
 };

 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
-    : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
+    : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
+      buffer_cache(*this, STREAM_BUFFER_SIZE) {
    // Create sampler objects
    for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
        texture_samplers[i].Create();
@@ -104,7 +105,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
    }

    ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported");
-
+    OpenGLState::ApplyDefaultState();
    // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
    state.clip_distance[0] = true;

@@ -115,8 +116,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
    state.draw.shader_program = 0;
    state.Apply();

-    glEnable(GL_BLEND);
-
    glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);

    LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
@@ -401,7 +400,7 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {

 void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
                                             bool preserve_contents,
-                                             boost::optional<std::size_t> single_color_target) {
+                                             std::optional<std::size_t> single_color_target) {
    MICROPROFILE_SCOPE(OpenGL_Framebuffer);
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;

@@ -418,6 +417,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
    // Bind the framebuffer surfaces
    state.draw.draw_framebuffer = framebuffer.handle;
    state.Apply();
+    state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;

    if (using_color_fb) {
        if (single_color_target) {
@@ -429,6 +429,9 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
                // Assume that a surface will be written to if it is used as a framebuffer, even if
                // the shader doesn't actually write to it.
                color_surface->MarkAsModified(true, res_cache);
+                // Workaround for and issue in nvidia drivers
+                // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
+                state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
            }

            glFramebufferTexture2D(
@@ -446,6 +449,11 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
                    // Assume that a surface will be written to if it is used as a framebuffer, even
                    // if the shader doesn't actually write to it.
                    color_surface->MarkAsModified(true, res_cache);
+                    // Enable sRGB only for supported formats
+                    // Workaround for and issue in nvidia drivers
+                    // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
+                    state.framebuffer_srgb.enabled |=
+                        color_surface->GetSurfaceParams().srgb_conversion;
                }

                buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
@@ -504,10 +512,10 @@ void RasterizerOpenGL::Clear() {

    OpenGLState clear_state;
    clear_state.draw.draw_framebuffer = framebuffer.handle;
-    clear_state.color_mask.red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
-    clear_state.color_mask.green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
-    clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
-    clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
+    clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
+    clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
+    clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
+    clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;

    if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
        regs.clear_buffers.A) {
@@ -537,7 +545,9 @@ void RasterizerOpenGL::Clear() {

    ConfigureFramebuffers(use_color, use_depth || use_stencil, false,
                          regs.clear_buffers.RT.Value());
-
+    // Copy the sRGB setting to the clear state to avoid problem with
+    // specific driver implementations
+    clear_state.framebuffer_srgb.enabled = state.framebuffer_srgb.enabled;
    clear_state.Apply();

    if (use_color) {
@@ -564,12 +574,13 @@ void RasterizerOpenGL::DrawArrays() {
    ScopeAcquireGLContext acquire_context{emu_window};

    ConfigureFramebuffers();
-
+    SyncColorMask();
    SyncDepthTestState();
    SyncStencilTestState();
    SyncBlendState();
    SyncLogicOpState();
    SyncCullMode();
+    SyncPrimitiveRestart();
    SyncScissorTest();
    // Alpha Testing is synced on shaders.
    SyncTransformFeedback();
@@ -690,7 +701,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,

    // Verify that the cached surface is the same size and format as the requested framebuffer
    const auto& params{surface->GetSurfaceParams()};
-    const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)};
+    const auto& pixel_format{
+        VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
    ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
    ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
    ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
@@ -718,11 +730,15 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr

    if (mag_filter != config.mag_filter) {
        mag_filter = config.mag_filter;
-        glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER, MaxwellToGL::TextureFilterMode(mag_filter));
+        glSamplerParameteri(
+            s, GL_TEXTURE_MAG_FILTER,
+            MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
    }
-    if (min_filter != config.min_filter) {
+    if (min_filter != config.min_filter || mip_filter != config.mip_filter) {
        min_filter = config.min_filter;
-        glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, MaxwellToGL::TextureFilterMode(min_filter));
+        mip_filter = config.mip_filter;
+        glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER,
+                            MaxwellToGL::TextureFilterMode(min_filter, mip_filter));
    }

    if (wrap_u != config.wrap_u) {
@@ -883,12 +899,16 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,

 void RasterizerOpenGL::SyncViewport() {
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
-
-    state.viewport.x = viewport_rect.left;
-    state.viewport.y = viewport_rect.bottom;
-    state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
-    state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
+    for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+        const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
+        auto& viewport = state.viewports[i];
+        viewport.x = viewport_rect.left;
+        viewport.y = viewport_rect.bottom;
+        viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
+        viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
+        viewport.depth_range_far = regs.viewport[i].depth_range_far;
+        viewport.depth_range_near = regs.viewport[i].depth_range_near;
+    }
 }

 void RasterizerOpenGL::SyncClipEnabled() {
@@ -923,12 +943,11 @@ void RasterizerOpenGL::SyncCullMode() {
    }
 }

-void RasterizerOpenGL::SyncDepthScale() {
-    UNREACHABLE();
-}
+void RasterizerOpenGL::SyncPrimitiveRestart() {
+    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;

-void RasterizerOpenGL::SyncDepthOffset() {
-    UNREACHABLE();
+    state.primitive_restart.enabled = regs.primitive_restart.enabled;
+    state.primitive_restart.index = regs.primitive_restart.index;
 }

 void RasterizerOpenGL::SyncDepthTestState() {
@@ -971,26 +990,60 @@ void RasterizerOpenGL::SyncStencilTestState() {
    state.stencil.back.write_mask = regs.stencil_back_mask;
 }

+void RasterizerOpenGL::SyncColorMask() {
+    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+        const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i];
+        auto& dest = state.color_mask[i];
+        dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE;
+        dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE;
+        dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
+        dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
+    }
+}
+
 void RasterizerOpenGL::SyncBlendState() {
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;

-    // TODO(Subv): Support more than just render target 0.
-    state.blend.enabled = regs.blend.enable[0] != 0;
+    state.blend_color.red = regs.blend_color.r;
+    state.blend_color.green = regs.blend_color.g;
+    state.blend_color.blue = regs.blend_color.b;
+    state.blend_color.alpha = regs.blend_color.a;

-    if (!state.blend.enabled)
+    state.independant_blend.enabled = regs.independent_blend_enable;
+    if (!state.independant_blend.enabled) {
+        auto& blend = state.blend[0];
+        blend.enabled = regs.blend.enable[0] != 0;
+        blend.separate_alpha = regs.blend.separate_alpha;
+        blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
+        blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
+        blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
+        if (blend.separate_alpha) {
+            blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
+            blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
+            blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
+        }
+        for (size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+            state.blend[i].enabled = false;
+        }
        return;
+    }

-    ASSERT_MSG(regs.logic_op.enable == 0,
-               "Blending and logic op can't be enabled at the same time.");
-
-    ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
-    ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented");
-    state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb);
-    state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb);
-    state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_rgb);
-    state.blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_a);
-    state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_a);
-    state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_a);
+    for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+        auto& blend = state.blend[i];
+        blend.enabled = regs.blend.enable[i] != 0;
+        if (!blend.enabled)
+            continue;
+        blend.separate_alpha = regs.independent_blend[i].separate_alpha;
+        blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[i].equation_rgb);
+        blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_source_rgb);
+        blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_dest_rgb);
+        if (blend.separate_alpha) {
+            blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[i].equation_a);
+            blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_source_a);
+            blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_dest_a);
+        }
+    }
 }

 void RasterizerOpenGL::SyncLogicOpState() {
@@ -1009,19 +1062,19 @@ void RasterizerOpenGL::SyncLogicOpState() {
 }

 void RasterizerOpenGL::SyncScissorTest() {
+    // TODO: what is the correct behavior here, a single scissor for all targets
+    // or scissor disabled for the rest of the targets?
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-
    state.scissor.enabled = (regs.scissor_test.enable != 0);
-    // TODO(Blinkhawk): Figure if the hardware supports scissor testing per viewport and how it's
-    // implemented.
-    if (regs.scissor_test.enable != 0) {
-        const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x;
-        const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y;
-        state.scissor.x = regs.scissor_test.min_x;
-        state.scissor.y = regs.scissor_test.min_y;
-        state.scissor.width = width;
-        state.scissor.height = height;
+    if (regs.scissor_test.enable == 0) {
+        return;
    }
+    const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x;
+    const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y;
+    state.scissor.x = regs.scissor_test.min_x;
+    state.scissor.y = regs.scissor_test.min_y;
+    state.scissor.width = width;
+    state.scissor.height = height;
 }

 void RasterizerOpenGL::SyncTransformFeedback() {
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -8,12 +8,12 @@
 #include <cstddef>
 #include <map>
 #include <memory>
+#include <optional>
 #include <tuple>
 #include <utility>
 #include <vector>

 #include <boost/icl/interval_map.hpp>
-#include <boost/optional.hpp>
 #include <boost/range/iterator_range.hpp>
 #include <glad/glad.h>

@@ -93,6 +93,7 @@ private:
    private:
        Tegra::Texture::TextureFilter mag_filter;
        Tegra::Texture::TextureFilter min_filter;
+        Tegra::Texture::TextureMipmapFilter mip_filter;
        Tegra::Texture::WrapMode wrap_u;
        Tegra::Texture::WrapMode wrap_v;
        Tegra::Texture::WrapMode wrap_p;
@@ -110,7 +111,7 @@ private:
     */
    void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
                               bool preserve_contents = true,
-                               boost::optional<std::size_t> single_color_target = {});
+                               std::optional<std::size_t> single_color_target = {});

    /*
     * Configures the current constbuffers to use for the draw command.
@@ -132,7 +133,7 @@ private:
    u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
                      GLenum primitive_mode, u32 current_unit);

-    /// Syncs the viewport to match the guest state
+    /// Syncs the viewport and depth range to match the guest state
    void SyncViewport();

    /// Syncs the clip enabled status to match the guest state
@@ -144,11 +145,8 @@ private:
    /// Syncs the cull mode to match the guest state
    void SyncCullMode();

-    /// Syncs the depth scale to match the guest state
-    void SyncDepthScale();
-
-    /// Syncs the depth offset to match the guest state
-    void SyncDepthOffset();
+    /// Syncs the primitve restart to match the guest state
+    void SyncPrimitiveRestart();

    /// Syncs the depth test state to match the guest state
    void SyncDepthTestState();
@@ -171,6 +169,9 @@ private:
    /// Syncs the point state to match the guest state
    void SyncPointState();

+    /// Syncs Color Mask
+    void SyncColorMask();
+
    /// Check asserts for alpha testing.
    void CheckAlphaTests();

--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -15,16 +15,24 @@
 #include "core/memory.h"
 #include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
+#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/utils.h"
+#include "video_core/surface.h"
 #include "video_core/textures/astc.h"
 #include "video_core/textures/decoders.h"
 #include "video_core/utils.h"

 namespace OpenGL {

-using SurfaceType = SurfaceParams::SurfaceType;
-using PixelFormat = SurfaceParams::PixelFormat;
-using ComponentType = SurfaceParams::ComponentType;
+using VideoCore::Surface::ComponentTypeFromDepthFormat;
+using VideoCore::Surface::ComponentTypeFromRenderTarget;
+using VideoCore::Surface::ComponentTypeFromTexture;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+using VideoCore::Surface::PixelFormatFromTextureFormat;
+using VideoCore::Surface::SurfaceTargetFromTextureType;

 struct FormatTuple {
    GLint internal_format;
@@ -34,34 +42,6 @@ struct FormatTuple {
    bool compressed;
 };

-static bool IsPixelFormatASTC(PixelFormat format) {
-    switch (format) {
-    case PixelFormat::ASTC_2D_4X4:
-    case PixelFormat::ASTC_2D_5X4:
-    case PixelFormat::ASTC_2D_8X8:
-    case PixelFormat::ASTC_2D_8X5:
-        return true;
-    default:
-        return false;
-    }
-}
-
-static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
-    switch (format) {
-    case PixelFormat::ASTC_2D_4X4:
-        return {4, 4};
-    case PixelFormat::ASTC_2D_5X4:
-        return {5, 4};
-    case PixelFormat::ASTC_2D_8X8:
-        return {8, 8};
-    case PixelFormat::ASTC_2D_8X5:
-        return {8, 5};
-    default:
-        LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
-        UNREACHABLE();
-    }
-}
-
 void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
@@ -78,27 +58,34 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
    }
 }

-std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
-    const u32 compression_factor{GetCompressionFactor(pixel_format)};
+std::size_t SurfaceParams::InnerMipmapMemorySize(u32 mip_level, bool force_gl, bool layer_only,
+                                                 bool uncompressed) const {
+    const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
+    const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
    const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
    u32 m_depth = (layer_only ? 1U : depth);
-    u32 m_width = std::max(1U, width / compression_factor);
-    u32 m_height = std::max(1U, height / compression_factor);
-    std::size_t size = Tegra::Texture::CalculateSize(is_tiled, bytes_per_pixel, m_width, m_height,
-                                                     m_depth, block_height, block_depth);
-    u32 m_block_height = block_height;
-    u32 m_block_depth = block_depth;
-    std::size_t block_size_bytes = 512 * block_height * block_depth; // 512 is GOB size
-    for (u32 i = 1; i < max_mip_level; i++) {
-        m_width = std::max(1U, m_width / 2);
-        m_height = std::max(1U, m_height / 2);
-        m_depth = std::max(1U, m_depth / 2);
-        m_block_height = std::max(1U, m_block_height / 2);
-        m_block_depth = std::max(1U, m_block_depth / 2);
-        size += Tegra::Texture::CalculateSize(is_tiled, bytes_per_pixel, m_width, m_height, m_depth,
-                                              m_block_height, m_block_depth);
+    u32 m_width = MipWidth(mip_level);
+    u32 m_height = MipHeight(mip_level);
+    m_width = uncompressed ? m_width : std::max(1U, (m_width + tile_x - 1) / tile_x);
+    m_height = uncompressed ? m_height : std::max(1U, (m_height + tile_y - 1) / tile_y);
+    m_depth = std::max(1U, m_depth >> mip_level);
+    u32 m_block_height = MipBlockHeight(mip_level);
+    u32 m_block_depth = MipBlockDepth(mip_level);
+    return Tegra::Texture::CalculateSize(force_gl ? false : is_tiled, bytes_per_pixel, m_width,
+                                         m_height, m_depth, m_block_height, m_block_depth);
+}
+
+std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
+                                           bool uncompressed) const {
+    std::size_t block_size_bytes = Tegra::Texture::GetGOBSize() * block_height * block_depth;
+    std::size_t size = 0;
+    for (u32 i = 0; i < max_mip_level; i++) {
+        size += InnerMipmapMemorySize(i, force_gl, layer_only, uncompressed);
    }
-    return is_tiled ? Common::AlignUp(size, block_size_bytes) : size;
+    if (!force_gl && is_tiled) {
+        size = Common::AlignUp(size, block_size_bytes);
+    }
+    return size;
 }

 /*static*/ SurfaceParams SurfaceParams::CreateForTexture(
@@ -108,8 +95,9 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
    params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
    params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
-    params.pixel_format =
-        PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value());
+    params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
+    params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
+                                                       params.srgb_conversion);
    params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
    params.type = GetFormatType(params.pixel_format);
    params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
@@ -140,6 +128,13 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
            params.target = SurfaceTarget::Texture2D;
        }
        break;
+    case SurfaceTarget::TextureCubeArray:
+        params.depth = config.tic.Depth() * 6;
+        if (!entry.IsArray()) {
+            ASSERT(params.depth == 6);
+            params.target = SurfaceTarget::TextureCubemap;
+        }
+        break;
    default:
        LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
        UNREACHABLE();
@@ -166,6 +161,8 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.block_height = 1 << config.memory_layout.block_height;
    params.block_depth = 1 << config.memory_layout.block_depth;
    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+    params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
+                             config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
    params.component_type = ComponentTypeFromRenderTarget(config.format);
    params.type = GetFormatType(params.pixel_format);
    params.width = config.width;
@@ -173,7 +170,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.unaligned_height = config.height;
    params.target = SurfaceTarget::Texture2D;
    params.depth = 1;
-    params.max_mip_level = 0;
+    params.max_mip_level = 1;
    params.is_layered = false;

    // Render target specific parameters, not used for caching
@@ -201,12 +198,13 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.pixel_format = PixelFormatFromDepthFormat(format);
    params.component_type = ComponentTypeFromDepthFormat(format);
    params.type = GetFormatType(params.pixel_format);
+    params.srgb_conversion = false;
    params.width = zeta_width;
    params.height = zeta_height;
    params.unaligned_height = zeta_height;
    params.target = SurfaceTarget::Texture2D;
    params.depth = 1;
-    params.max_mip_level = 0;
+    params.max_mip_level = 1;
    params.is_layered = false;
    params.rt = {};

@@ -224,6 +222,8 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
    params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+    params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
+                             config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
    params.component_type = ComponentTypeFromRenderTarget(config.format);
    params.type = GetFormatType(params.pixel_format);
    params.width = config.width;
@@ -231,7 +231,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.unaligned_height = config.height;
    params.target = SurfaceTarget::Texture2D;
    params.depth = 1;
-    params.max_mip_level = 0;
+    params.max_mip_level = 1;
    params.rt = {};

    params.InitCacheParameters(config.Address());
@@ -239,7 +239,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    return params;
 }

-static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
+static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
    {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false},                     // ABGR8S
    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false},   // ABGR8UI
@@ -255,7 +255,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
     false},                                                                     // R11FG11FB10F
    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
-    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
     true}, // DXT1
    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
     true}, // DXT23
@@ -289,14 +289,31 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
    {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false},           // RG16I
    {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false},             // RG16S
    {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false},                // RGB32F
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8
-    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},                       // RG8U
-    {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false},                                // RG8S
-    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // RG32UI
-    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // R32UI
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
+     false},                                                                   // RGBA8_SRGB
+    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},            // RG8U
+    {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false},                     // RG8S
+    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},   // RG32UI
+    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},   // R32UI
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_8X8
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_8X5
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_5X4
+    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
+    // Compressed sRGB formats
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     true}, // DXT1_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     true}, // DXT23_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     true}, // DXT45_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8,
+     ComponentType::UNorm, true},                                              // BC7U_SRGB
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_5X5
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB

    // Depth formats
    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
@@ -312,20 +329,22 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     ComponentType::Float, false}, // Z32FS8
 }};

-static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) {
+static GLenum SurfaceTargetToGL(SurfaceTarget target) {
    switch (target) {
-    case SurfaceParams::SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture1D:
        return GL_TEXTURE_1D;
-    case SurfaceParams::SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture2D:
        return GL_TEXTURE_2D;
-    case SurfaceParams::SurfaceTarget::Texture3D:
+    case SurfaceTarget::Texture3D:
        return GL_TEXTURE_3D;
-    case SurfaceParams::SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture1DArray:
        return GL_TEXTURE_1D_ARRAY;
-    case SurfaceParams::SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::Texture2DArray:
        return GL_TEXTURE_2D_ARRAY;
-    case SurfaceParams::SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubemap:
        return GL_TEXTURE_CUBE_MAP;
+    case SurfaceTarget::TextureCubeArray:
+        return GL_TEXTURE_CUBE_MAP_ARRAY_ARB;
    }
    LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target));
    UNREACHABLE();
@@ -340,55 +359,41 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
    return format;
 }

-MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
-    u32 actual_height{unaligned_height};
+MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
+    u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
    if (IsPixelFormatASTC(pixel_format)) {
        // ASTC formats must stop at the ATSC block size boundary
        actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second);
    }
-    return {0, actual_height, width, 0};
-}
-
-/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN
-static bool IsFormatBCn(PixelFormat format) {
-    switch (format) {
-    case PixelFormat::DXT1:
-    case PixelFormat::DXT23:
-    case PixelFormat::DXT45:
-    case PixelFormat::DXN1:
-    case PixelFormat::DXN2SNORM:
-    case PixelFormat::DXN2UNORM:
-    case PixelFormat::BC7U:
-    case PixelFormat::BC6H_UF16:
-    case PixelFormat::BC6H_SF16:
-        return true;
-    }
-    return false;
+    return {0, actual_height, MipWidth(mip_level), 0};
 }

 template <bool morton_to_gl, PixelFormat format>
 void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
                std::size_t gl_buffer_size, VAddr addr) {
-    constexpr u32 bytes_per_pixel = SurfaceParams::GetBytesPerPixel(format);
+    constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);

    // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
    // pixel values.
-    const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
+    const u32 tile_size_x{GetDefaultBlockWidth(format)};
+    const u32 tile_size_y{GetDefaultBlockHeight(format)};

    if (morton_to_gl) {
-        const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
-            addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth);
+        const std::vector<u8> data =
+            Tegra::Texture::UnswizzleTexture(addr, tile_size_x, tile_size_y, bytes_per_pixel,
+                                             stride, height, depth, block_height, block_depth);
        const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
        memcpy(gl_buffer, data.data(), size_to_copy);
    } else {
-        Tegra::Texture::CopySwizzledData(stride / tile_size, height / tile_size, depth,
+        Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
+                                         (height + tile_size_y - 1) / tile_size_y, depth,
                                         bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
                                         gl_buffer, false, block_height, block_depth);
    }
 }

 using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
-                                     SurfaceParams::MaxPixelFormat>;
+                                     VideoCore::Surface::MaxPixelFormat>;

 static constexpr GLConversionArray morton_to_gl_fns = {
    // clang-format off
@@ -432,7 +437,7 @@ static constexpr GLConversionArray morton_to_gl_fns = {
        MortonCopy<true, PixelFormat::RG16I>,
        MortonCopy<true, PixelFormat::RG16S>,
        MortonCopy<true, PixelFormat::RGB32F>,
-        MortonCopy<true, PixelFormat::SRGBA8>,
+        MortonCopy<true, PixelFormat::RGBA8_SRGB>,
        MortonCopy<true, PixelFormat::RG8U>,
        MortonCopy<true, PixelFormat::RG8S>,
        MortonCopy<true, PixelFormat::RG32UI>,
@@ -440,6 +445,17 @@ static constexpr GLConversionArray morton_to_gl_fns = {
        MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
        MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
        MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
+        MortonCopy<true, PixelFormat::BGRA8_SRGB>,
+        MortonCopy<true, PixelFormat::DXT1_SRGB>,
+        MortonCopy<true, PixelFormat::DXT23_SRGB>,
+        MortonCopy<true, PixelFormat::DXT45_SRGB>,
+        MortonCopy<true, PixelFormat::BC7U_SRGB>,
+        MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
+        MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
+        MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
+        MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
+        MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
+        MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
        MortonCopy<true, PixelFormat::Z32F>,
        MortonCopy<true, PixelFormat::Z16>,
        MortonCopy<true, PixelFormat::Z24S8>,
@@ -491,7 +507,7 @@ static constexpr GLConversionArray gl_to_morton_fns = {
        MortonCopy<false, PixelFormat::RG16I>,
        MortonCopy<false, PixelFormat::RG16S>,
        MortonCopy<false, PixelFormat::RGB32F>,
-        MortonCopy<false, PixelFormat::SRGBA8>,
+        MortonCopy<false, PixelFormat::RGBA8_SRGB>,
        MortonCopy<false, PixelFormat::RG8U>,
        MortonCopy<false, PixelFormat::RG8S>,
        MortonCopy<false, PixelFormat::RG32UI>,
@@ -499,6 +515,17 @@ static constexpr GLConversionArray gl_to_morton_fns = {
        nullptr,
        nullptr,
        nullptr,
+        MortonCopy<false, PixelFormat::BGRA8_SRGB>,
+        MortonCopy<false, PixelFormat::DXT1_SRGB>,
+        MortonCopy<false, PixelFormat::DXT23_SRGB>,
+        MortonCopy<false, PixelFormat::DXT45_SRGB>,
+        MortonCopy<false, PixelFormat::BC7U_SRGB>,
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
        MortonCopy<false, PixelFormat::Z32F>,
        MortonCopy<false, PixelFormat::Z16>,
        MortonCopy<false, PixelFormat::Z24S8>,
@@ -508,34 +535,39 @@ static constexpr GLConversionArray gl_to_morton_fns = {
 };

 void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params,
-                 std::vector<u8>& gl_buffer) {
-    u32 depth = params.depth;
-    if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
+                 std::vector<u8>& gl_buffer, u32 mip_level) {
+    u32 depth = params.MipDepth(mip_level);
+    if (params.target == SurfaceTarget::Texture2D) {
        // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
        depth = 1U;
    }
    if (params.is_layered) {
-        u64 offset = 0;
+        u64 offset = params.GetMipmapLevelOffset(mip_level);
        u64 offset_gl = 0;
        u64 layer_size = params.LayerMemorySize();
-        u64 gl_size = params.LayerSizeGL();
-        for (u32 i = 0; i < depth; i++) {
+        u64 gl_size = params.LayerSizeGL(mip_level);
+        for (u32 i = 0; i < params.depth; i++) {
            functions[static_cast<std::size_t>(params.pixel_format)](
-                params.width, params.block_height, params.height, params.block_depth, 1,
+                params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
+                params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1,
                gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
            offset += layer_size;
            offset_gl += gl_size;
        }
    } else {
+        u64 offset = params.GetMipmapLevelOffset(mip_level);
        functions[static_cast<std::size_t>(params.pixel_format)](
-            params.width, params.block_height, params.height, params.block_depth, depth,
-            gl_buffer.data(), gl_buffer.size(), params.addr);
+            params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
+            params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(),
+            gl_buffer.size(), params.addr + offset);
    }
 }

+MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64));
 static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
                        GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0,
                        GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
+    MICROPROFILE_SCOPE(OpenGL_BlitSurface);

    const auto& src_params{src_surface->GetSurfaceParams()};
    const auto& dst_params{dst_surface->GetSurfaceParams()};
@@ -546,19 +578,21 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
    OpenGLState state;
    state.draw.read_framebuffer = read_fb_handle;
    state.draw.draw_framebuffer = draw_fb_handle;
+    // Set sRGB enabled if the destination surfaces need it
+    state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
    state.Apply();

    u32 buffers{};

    if (src_params.type == SurfaceType::ColorTexture) {
        switch (src_params.target) {
-        case SurfaceParams::SurfaceTarget::Texture2D:
+        case SurfaceTarget::Texture2D:
            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
                                   GL_TEXTURE_2D, src_surface->Texture().handle, 0);
            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
                                   0, 0);
            break;
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
+        case SurfaceTarget::TextureCubemap:
            glFramebufferTexture2D(
                GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
@@ -567,12 +601,12 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
                GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
            break;
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::Texture2DArray:
            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
                                      src_surface->Texture().handle, 0, 0);
            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
            break;
-        case SurfaceParams::SurfaceTarget::Texture3D:
+        case SurfaceTarget::Texture3D:
            glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
                                   SurfaceTargetToGL(src_params.target),
                                   src_surface->Texture().handle, 0, 0);
@@ -588,13 +622,13 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
        }

        switch (dst_params.target) {
-        case SurfaceParams::SurfaceTarget::Texture2D:
+        case SurfaceTarget::Texture2D:
            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
                                   GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
                                   0, 0);
            break;
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
+        case SurfaceTarget::TextureCubemap:
            glFramebufferTexture2D(
                GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
@@ -603,13 +637,13 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
                GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
            break;
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::Texture2DArray:
            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
                                      dst_surface->Texture().handle, 0, 0);
            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
            break;

-        case SurfaceParams::SurfaceTarget::Texture3D:
+        case SurfaceTarget::Texture3D:
            glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
                                   SurfaceTargetToGL(dst_params.target),
                                   dst_surface->Texture().handle, 0, 0);
@@ -673,9 +707,11 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa
                       0, 0, width, height, 1);
 }

+MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
 static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
                        GLuint copy_pbo_handle, GLenum src_attachment = 0,
                        GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
+    MICROPROFILE_SCOPE(OpenGL_CopySurface);
    ASSERT_MSG(dst_attachment == 0, "Unimplemented");

    const auto& src_params{src_surface->GetSurfaceParams()};
@@ -730,21 +766,22 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
        UNREACHABLE();
    } else {
        switch (dst_params.target) {
-        case SurfaceParams::SurfaceTarget::Texture1D:
+        case SurfaceTarget::Texture1D:
            glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format,
                                dest_format.type, nullptr);
            break;
-        case SurfaceParams::SurfaceTarget::Texture2D:
+        case SurfaceTarget::Texture2D:
            glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height,
                                dest_format.format, dest_format.type, nullptr);
            break;
-        case SurfaceParams::SurfaceTarget::Texture3D:
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::Texture3D:
+        case SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::TextureCubeArray:
            glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height,
                                static_cast<GLsizei>(dst_params.depth), dest_format.format,
                                dest_format.type, nullptr);
            break;
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
+        case SurfaceTarget::TextureCubemap:
            glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0,
                                static_cast<GLint>(cubemap_face), width, height, 1,
                                dest_format.format, dest_format.type, nullptr);
@@ -781,35 +818,43 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
    if (!format_tuple.compressed) {
        // Only pre-create the texture for non-compressed textures.
        switch (params.target) {
-        case SurfaceParams::SurfaceTarget::Texture1D:
-            glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
-                           rect.GetWidth());
+        case SurfaceTarget::Texture1D:
+            glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level,
+                           format_tuple.internal_format, rect.GetWidth());
            break;
-        case SurfaceParams::SurfaceTarget::Texture2D:
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
-            glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
-                           rect.GetWidth(), rect.GetHeight());
+        case SurfaceTarget::Texture2D:
+        case SurfaceTarget::TextureCubemap:
+            glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level,
+                           format_tuple.internal_format, rect.GetWidth(), rect.GetHeight());
            break;
-        case SurfaceParams::SurfaceTarget::Texture3D:
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
-            glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
-                           rect.GetWidth(), rect.GetHeight(), params.depth);
+        case SurfaceTarget::Texture3D:
+        case SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::TextureCubeArray:
+            glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level,
+                           format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(),
+                           params.depth);
            break;
        default:
            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                         static_cast<u32>(params.target));
            UNREACHABLE();
-            glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(),
-                           rect.GetHeight());
+            glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format,
+                           rect.GetWidth(), rect.GetHeight());
        }
    }

    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAX_LEVEL,
+                    params.max_mip_level - 1);
+    if (params.max_mip_level == 1) {
+        glTexParameterf(SurfaceTargetToGL(params.target), GL_TEXTURE_LOD_BIAS, 1000.0);
+    }

-    VideoCore::LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
-                             SurfaceParams::SurfaceTargetName(params.target));
+    LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
+                  SurfaceParams::SurfaceTargetName(params.target));

    // Clamp size to mapped GPU memory region
    // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
@@ -839,7 +884,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo

    S8Z24 s8z24_pixel{};
    Z24S8 z24s8_pixel{};
-    constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::S8Z24)};
+    constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
    for (std::size_t y = 0; y < height; ++y) {
        for (std::size_t x = 0; x < width; ++x) {
            const std::size_t offset{bpp * (y * width + x)};
@@ -859,7 +904,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo
 }

 static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
-    constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::G8R8U)};
+    constexpr auto bpp{GetBytesPerPixel(PixelFormat::G8R8U)};
    for (std::size_t y = 0; y < height; ++y) {
        for (std::size_t x = 0; x < width; ++x) {
            const std::size_t offset{bpp * (y * width + x)};
@@ -876,17 +921,24 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
 * typical desktop GPUs.
 */
 static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
-                                               u32 width, u32 height) {
+                                               u32 width, u32 height, u32 depth) {
    switch (pixel_format) {
    case PixelFormat::ASTC_2D_4X4:
    case PixelFormat::ASTC_2D_8X8:
    case PixelFormat::ASTC_2D_8X5:
-    case PixelFormat::ASTC_2D_5X4: {
+    case PixelFormat::ASTC_2D_5X4:
+    case PixelFormat::ASTC_2D_5X5:
+    case PixelFormat::ASTC_2D_4X4_SRGB:
+    case PixelFormat::ASTC_2D_8X8_SRGB:
+    case PixelFormat::ASTC_2D_8X5_SRGB:
+    case PixelFormat::ASTC_2D_5X4_SRGB:
+    case PixelFormat::ASTC_2D_5X5_SRGB: {
        // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
        u32 block_width{};
        u32 block_height{};
        std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
-        data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
+        data =
+            Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
        break;
    }
    case PixelFormat::S8Z24:
@@ -913,7 +965,9 @@ static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelForm
    case PixelFormat::G8R8U:
    case PixelFormat::G8R8S:
    case PixelFormat::ASTC_2D_4X4:
-    case PixelFormat::ASTC_2D_8X8: {
+    case PixelFormat::ASTC_2D_8X8:
+    case PixelFormat::ASTC_2D_4X4_SRGB:
+    case PixelFormat::ASTC_2D_8X8_SRGB: {
        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
                     static_cast<u32>(pixel_format));
        UNREACHABLE();
@@ -926,23 +980,25 @@ static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelForm
    }
 }

-MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
+MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
 void CachedSurface::LoadGLBuffer() {
    MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
-
-    gl_buffer.resize(params.size_in_bytes_gl);
+    gl_buffer.resize(params.max_mip_level);
+    for (u32 i = 0; i < params.max_mip_level; i++)
+        gl_buffer[i].resize(params.GetMipmapSizeGL(i));
    if (params.is_tiled) {
        ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
                   params.block_width, static_cast<u32>(params.target));
-
-        SwizzleFunc(morton_to_gl_fns, params, gl_buffer);
+        for (u32 i = 0; i < params.max_mip_level; i++)
+            SwizzleFunc(morton_to_gl_fns, params, gl_buffer[i], i);
    } else {
        const auto texture_src_data{Memory::GetPointer(params.addr)};
        const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
-        gl_buffer.assign(texture_src_data, texture_src_data_end);
+        gl_buffer[0].assign(texture_src_data, texture_src_data_end);
    }
-
-    ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height);
+    for (u32 i = 0; i < params.max_mip_level; i++)
+        ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
+                                           params.MipHeight(i), params.MipDepth(i));
 }

 MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
@@ -952,18 +1008,19 @@ void CachedSurface::FlushGLBuffer() {
    ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");

    // OpenGL temporary buffer needs to be big enough to store raw texture size
-    gl_buffer.resize(GetSizeInBytes());
+    gl_buffer.resize(1);
+    gl_buffer[0].resize(GetSizeInBytes());

    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0);
    glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
    ASSERT(!tuple.compressed);
    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
-    glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, gl_buffer.size(),
-                      gl_buffer.data());
+    glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
+                      static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
    glPixelStorei(GL_PACK_ROW_LENGTH, 0);
-    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
+    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width,
                                        params.height);
    ASSERT(params.type != SurfaceType::Fill);
    const u8* const texture_src_data = Memory::GetPointer(params.addr);
@@ -972,28 +1029,23 @@ void CachedSurface::FlushGLBuffer() {
        ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
                   params.block_width, static_cast<u32>(params.target));

-        SwizzleFunc(gl_to_morton_fns, params, gl_buffer);
+        SwizzleFunc(gl_to_morton_fns, params, gl_buffer[0], 0);
    } else {
-        std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer.data(), GetSizeInBytes());
+        std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
    }
 }

-MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
-void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
-    if (params.type == SurfaceType::Fill)
-        return;
-
-    MICROPROFILE_SCOPE(OpenGL_TextureUL);
-
-    const auto& rect{params.GetRect()};
+void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
+                                          GLuint draw_fb_handle) {
+    const auto& rect{params.GetRect(mip_map)};

    // Load data from memory to the surface
    const GLint x0 = static_cast<GLint>(rect.left);
    const GLint y0 = static_cast<GLint>(rect.bottom);
    std::size_t buffer_offset =
-        static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
+        static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
                                 static_cast<std::size_t>(x0)) *
-        SurfaceParams::GetBytesPerPixel(params.pixel_format);
+        GetBytesPerPixel(params.pixel_format);

    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
    const GLuint target_tex = texture.handle;
@@ -1009,89 +1061,120 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
    cur_state.Apply();

    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0);
-    glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width));
+    ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));

+    GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
    glActiveTexture(GL_TEXTURE0);
    if (tuple.compressed) {
        switch (params.target) {
-        case SurfaceParams::SurfaceTarget::Texture2D:
-            glCompressedTexImage2D(
-                SurfaceTargetToGL(params.target), 0, tuple.internal_format,
-                static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0,
-                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
+        case SurfaceTarget::Texture2D:
+            glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
+                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
+                                   static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size,
+                                   &gl_buffer[mip_map][buffer_offset]);
            break;
-        case SurfaceParams::SurfaceTarget::Texture3D:
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
-            glCompressedTexImage3D(
-                SurfaceTargetToGL(params.target), 0, tuple.internal_format,
-                static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height),
-                static_cast<GLsizei>(params.depth), 0,
-                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
+        case SurfaceTarget::Texture3D:
+            glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
+                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
+                                   static_cast<GLsizei>(params.MipHeight(mip_map)),
+                                   static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size,
+                                   &gl_buffer[mip_map][buffer_offset]);
            break;
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
+        case SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::TextureCubeArray:
+            glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
+                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
+                                   static_cast<GLsizei>(params.MipHeight(mip_map)),
+                                   static_cast<GLsizei>(params.depth), 0, image_size,
+                                   &gl_buffer[mip_map][buffer_offset]);
+            break;
+        case SurfaceTarget::TextureCubemap: {
+            GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
            for (std::size_t face = 0; face < params.depth; ++face) {
                glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
-                                       0, tuple.internal_format, static_cast<GLsizei>(params.width),
-                                       static_cast<GLsizei>(params.height), 0,
-                                       static_cast<GLsizei>(params.SizeInBytesCubeFaceGL()),
-                                       &gl_buffer[buffer_offset]);
-                buffer_offset += params.SizeInBytesCubeFace();
+                                       mip_map, tuple.internal_format,
+                                       static_cast<GLsizei>(params.MipWidth(mip_map)),
+                                       static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
+                                       layer_size, &gl_buffer[mip_map][buffer_offset]);
+                buffer_offset += layer_size;
            }
            break;
+        }
        default:
            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                         static_cast<u32>(params.target));
            UNREACHABLE();
-            glCompressedTexImage2D(
-                GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
-                static_cast<GLsizei>(params.height), 0,
-                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
+            glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format,
+                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
+                                   static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
+                                   static_cast<GLsizei>(params.size_in_bytes_gl),
+                                   &gl_buffer[mip_map][buffer_offset]);
        }
    } else {

        switch (params.target) {
-        case SurfaceParams::SurfaceTarget::Texture1D:
-            glTexSubImage1D(SurfaceTargetToGL(params.target), 0, x0,
+        case SurfaceTarget::Texture1D:
+            glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0,
                            static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type,
-                            &gl_buffer[buffer_offset]);
+                            &gl_buffer[mip_map][buffer_offset]);
            break;
-        case SurfaceParams::SurfaceTarget::Texture2D:
-            glTexSubImage2D(SurfaceTargetToGL(params.target), 0, x0, y0,
+        case SurfaceTarget::Texture2D:
+            glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0,
                            static_cast<GLsizei>(rect.GetWidth()),
                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                            &gl_buffer[buffer_offset]);
+                            &gl_buffer[mip_map][buffer_offset]);
            break;
-        case SurfaceParams::SurfaceTarget::Texture3D:
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
-            glTexSubImage3D(SurfaceTargetToGL(params.target), 0, x0, y0, 0,
+        case SurfaceTarget::Texture3D:
+            glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
+                            static_cast<GLsizei>(rect.GetWidth()),
+                            static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
+                            tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
+            break;
+        case SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::TextureCubeArray:
+            glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
                            static_cast<GLsizei>(rect.GetWidth()),
                            static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
-                            tuple.type, &gl_buffer[buffer_offset]);
+                            tuple.type, &gl_buffer[mip_map][buffer_offset]);
            break;
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
+        case SurfaceTarget::TextureCubemap: {
+            std::size_t start = buffer_offset;
            for (std::size_t face = 0; face < params.depth; ++face) {
-                glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, x0,
-                                y0, static_cast<GLsizei>(rect.GetWidth()),
+                glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map,
+                                x0, y0, static_cast<GLsizei>(rect.GetWidth()),
                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                                &gl_buffer[buffer_offset]);
-                buffer_offset += params.SizeInBytesCubeFace();
+                                &gl_buffer[mip_map][buffer_offset]);
+                buffer_offset += params.LayerSizeGL(mip_map);
            }
            break;
+        }
        default:
            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                         static_cast<u32>(params.target));
            UNREACHABLE();
-            glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
+            glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                            &gl_buffer[buffer_offset]);
+                            &gl_buffer[mip_map][buffer_offset]);
        }
    }

    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
 }

-RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
+MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
+void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
+    if (params.type == SurfaceType::Fill)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_TextureUL);
+
+    for (u32 i = 0; i < params.max_mip_level; i++)
+        UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
+}
+
+RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
+    : RasterizerCache{rasterizer} {
    read_framebuffer.Create();
    draw_framebuffer.Create();
    copy_pbo.Create();
@@ -1231,8 +1314,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    // For compatible surfaces, we can just do fast glCopyImageSubData based copy
    if (old_params.target == new_params.target && old_params.type == new_params.type &&
        old_params.depth == new_params.depth && old_params.depth == 1 &&
-        SurfaceParams::GetFormatBpp(old_params.pixel_format) ==
-            SurfaceParams::GetFormatBpp(new_params.pixel_format)) {
+        GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) {
        FastCopySurface(old_surface, new_surface);
        return new_surface;
    }
@@ -1245,15 +1327,16 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    const bool is_blit{old_params.pixel_format == new_params.pixel_format};

    switch (new_params.target) {
-    case SurfaceParams::SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture2D:
        if (is_blit) {
            BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle);
        } else {
            CopySurface(old_surface, new_surface, copy_pbo.handle);
        }
        break;
-    case SurfaceParams::SurfaceTarget::TextureCubemap:
-    case SurfaceParams::SurfaceTarget::Texture3D:
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::Texture3D:
+    case SurfaceTarget::TextureCubeArray:
        AccurateCopySurface(old_surface, new_surface);
        break;
    default:
@@ -1263,7 +1346,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    }

    return new_surface;
-} // namespace OpenGL
+}

 Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
    return TryGet(addr);
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -7,6 +7,7 @@
 #include <array>
 #include <map>
 #include <memory>
+#include <string>
 #include <vector>

 #include "common/alignment.h"
@@ -18,6 +19,7 @@
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/surface.h"
 #include "video_core/textures/decoders.h"
 #include "video_core/textures/texture.h"

@@ -27,126 +29,12 @@ class CachedSurface;
 using Surface = std::shared_ptr<CachedSurface>;
 using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;

+using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
+using SurfaceType = VideoCore::Surface::SurfaceType;
+using PixelFormat = VideoCore::Surface::PixelFormat;
+using ComponentType = VideoCore::Surface::ComponentType;
+
 struct SurfaceParams {
-    enum class PixelFormat {
-        ABGR8U = 0,
-        ABGR8S = 1,
-        ABGR8UI = 2,
-        B5G6R5U = 3,
-        A2B10G10R10U = 4,
-        A1B5G5R5U = 5,
-        R8U = 6,
-        R8UI = 7,
-        RGBA16F = 8,
-        RGBA16U = 9,
-        RGBA16UI = 10,
-        R11FG11FB10F = 11,
-        RGBA32UI = 12,
-        DXT1 = 13,
-        DXT23 = 14,
-        DXT45 = 15,
-        DXN1 = 16, // This is also known as BC4
-        DXN2UNORM = 17,
-        DXN2SNORM = 18,
-        BC7U = 19,
-        BC6H_UF16 = 20,
-        BC6H_SF16 = 21,
-        ASTC_2D_4X4 = 22,
-        G8R8U = 23,
-        G8R8S = 24,
-        BGRA8 = 25,
-        RGBA32F = 26,
-        RG32F = 27,
-        R32F = 28,
-        R16F = 29,
-        R16U = 30,
-        R16S = 31,
-        R16UI = 32,
-        R16I = 33,
-        RG16 = 34,
-        RG16F = 35,
-        RG16UI = 36,
-        RG16I = 37,
-        RG16S = 38,
-        RGB32F = 39,
-        SRGBA8 = 40,
-        RG8U = 41,
-        RG8S = 42,
-        RG32UI = 43,
-        R32UI = 44,
-        ASTC_2D_8X8 = 45,
-        ASTC_2D_8X5 = 46,
-        ASTC_2D_5X4 = 47,
-
-        MaxColorFormat,
-
-        // Depth formats
-        Z32F = 48,
-        Z16 = 49,
-
-        MaxDepthFormat,
-
-        // DepthStencil formats
-        Z24S8 = 50,
-        S8Z24 = 51,
-        Z32FS8 = 52,
-
-        MaxDepthStencilFormat,
-
-        Max = MaxDepthStencilFormat,
-        Invalid = 255,
-    };
-
-    static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
-
-    enum class ComponentType {
-        Invalid = 0,
-        SNorm = 1,
-        UNorm = 2,
-        SInt = 3,
-        UInt = 4,
-        Float = 5,
-    };
-
-    enum class SurfaceType {
-        ColorTexture = 0,
-        Depth = 1,
-        DepthStencil = 2,
-        Fill = 3,
-        Invalid = 4,
-    };
-
-    enum class SurfaceTarget {
-        Texture1D,
-        Texture2D,
-        Texture3D,
-        Texture1DArray,
-        Texture2DArray,
-        TextureCubemap,
-    };
-
-    static SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type) {
-        switch (texture_type) {
-        case Tegra::Texture::TextureType::Texture1D:
-            return SurfaceTarget::Texture1D;
-        case Tegra::Texture::TextureType::Texture2D:
-        case Tegra::Texture::TextureType::Texture2DNoMipmap:
-            return SurfaceTarget::Texture2D;
-        case Tegra::Texture::TextureType::Texture3D:
-            return SurfaceTarget::Texture3D;
-        case Tegra::Texture::TextureType::TextureCubemap:
-            return SurfaceTarget::TextureCubemap;
-        case Tegra::Texture::TextureType::Texture1DArray:
-            return SurfaceTarget::Texture1DArray;
-        case Tegra::Texture::TextureType::Texture2DArray:
-            return SurfaceTarget::Texture2DArray;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type));
-            UNREACHABLE();
-            return SurfaceTarget::Texture2D;
-        }
-    }
-
    static std::string SurfaceTargetName(SurfaceTarget target) {
        switch (target) {
        case SurfaceTarget::Texture1D:
@@ -161,6 +49,8 @@ struct SurfaceParams {
            return "Texture2DArray";
        case SurfaceTarget::TextureCubemap:
            return "TextureCubemap";
+        case SurfaceTarget::TextureCubeArray:
+            return "TextureCubeArray";
        default:
            LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
            UNREACHABLE();
@@ -168,569 +58,12 @@ struct SurfaceParams {
        }
    }

-    static bool SurfaceTargetIsLayered(SurfaceTarget target) {
-        switch (target) {
-        case SurfaceTarget::Texture1D:
-        case SurfaceTarget::Texture2D:
-        case SurfaceTarget::Texture3D:
-            return false;
-        case SurfaceTarget::Texture1DArray:
-        case SurfaceTarget::Texture2DArray:
-        case SurfaceTarget::TextureCubemap:
-            return true;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
-            UNREACHABLE();
-            return false;
-        }
-    }
-
-    /**
-     * Gets the compression factor for the specified PixelFormat. This applies to just the
-     * "compressed width" and "compressed height", not the overall compression factor of a
-     * compressed image. This is used for maintaining proper surface sizes for compressed
-     * texture formats.
-     */
-    static constexpr u32 GetCompressionFactor(PixelFormat format) {
-        if (format == PixelFormat::Invalid)
-            return 0;
-
-        constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
-            1, // ABGR8U
-            1, // ABGR8S
-            1, // ABGR8UI
-            1, // B5G6R5U
-            1, // A2B10G10R10U
-            1, // A1B5G5R5U
-            1, // R8U
-            1, // R8UI
-            1, // RGBA16F
-            1, // RGBA16U
-            1, // RGBA16UI
-            1, // R11FG11FB10F
-            1, // RGBA32UI
-            4, // DXT1
-            4, // DXT23
-            4, // DXT45
-            4, // DXN1
-            4, // DXN2UNORM
-            4, // DXN2SNORM
-            4, // BC7U
-            4, // BC6H_UF16
-            4, // BC6H_SF16
-            4, // ASTC_2D_4X4
-            1, // G8R8U
-            1, // G8R8S
-            1, // BGRA8
-            1, // RGBA32F
-            1, // RG32F
-            1, // R32F
-            1, // R16F
-            1, // R16U
-            1, // R16S
-            1, // R16UI
-            1, // R16I
-            1, // RG16
-            1, // RG16F
-            1, // RG16UI
-            1, // RG16I
-            1, // RG16S
-            1, // RGB32F
-            1, // SRGBA8
-            1, // RG8U
-            1, // RG8S
-            1, // RG32UI
-            1, // R32UI
-            4, // ASTC_2D_8X8
-            4, // ASTC_2D_8X5
-            4, // ASTC_2D_5X4
-            1, // Z32F
-            1, // Z16
-            1, // Z24S8
-            1, // S8Z24
-            1, // Z32FS8
-        }};
-
-        ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size());
-        return compression_factor_table[static_cast<std::size_t>(format)];
-    }
-
-    static constexpr u32 GetFormatBpp(PixelFormat format) {
-        if (format == PixelFormat::Invalid)
-            return 0;
-
-        constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
-            32,  // ABGR8U
-            32,  // ABGR8S
-            32,  // ABGR8UI
-            16,  // B5G6R5U
-            32,  // A2B10G10R10U
-            16,  // A1B5G5R5U
-            8,   // R8U
-            8,   // R8UI
-            64,  // RGBA16F
-            64,  // RGBA16U
-            64,  // RGBA16UI
-            32,  // R11FG11FB10F
-            128, // RGBA32UI
-            64,  // DXT1
-            128, // DXT23
-            128, // DXT45
-            64,  // DXN1
-            128, // DXN2UNORM
-            128, // DXN2SNORM
-            128, // BC7U
-            128, // BC6H_UF16
-            128, // BC6H_SF16
-            32,  // ASTC_2D_4X4
-            16,  // G8R8U
-            16,  // G8R8S
-            32,  // BGRA8
-            128, // RGBA32F
-            64,  // RG32F
-            32,  // R32F
-            16,  // R16F
-            16,  // R16U
-            16,  // R16S
-            16,  // R16UI
-            16,  // R16I
-            32,  // RG16
-            32,  // RG16F
-            32,  // RG16UI
-            32,  // RG16I
-            32,  // RG16S
-            96,  // RGB32F
-            32,  // SRGBA8
-            16,  // RG8U
-            16,  // RG8S
-            64,  // RG32UI
-            32,  // R32UI
-            16,  // ASTC_2D_8X8
-            32,  // ASTC_2D_8X5
-            32,  // ASTC_2D_5X4
-            32,  // Z32F
-            16,  // Z16
-            32,  // Z24S8
-            32,  // S8Z24
-            64,  // Z32FS8
-        }};
-
-        ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
-        return bpp_table[static_cast<std::size_t>(format)];
-    }
-
    u32 GetFormatBpp() const {
-        return GetFormatBpp(pixel_format);
-    }
-
-    static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
-        switch (format) {
-        case Tegra::DepthFormat::S8_Z24_UNORM:
-            return PixelFormat::S8Z24;
-        case Tegra::DepthFormat::Z24_S8_UNORM:
-            return PixelFormat::Z24S8;
-        case Tegra::DepthFormat::Z32_FLOAT:
-            return PixelFormat::Z32F;
-        case Tegra::DepthFormat::Z16_UNORM:
-            return PixelFormat::Z16;
-        case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
-            return PixelFormat::Z32FS8;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
-    static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
-        switch (format) {
-        // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
-        // gamma.
-        case Tegra::RenderTargetFormat::RGBA8_SRGB:
-        case Tegra::RenderTargetFormat::RGBA8_UNORM:
-            return PixelFormat::ABGR8U;
-        case Tegra::RenderTargetFormat::RGBA8_SNORM:
-            return PixelFormat::ABGR8S;
-        case Tegra::RenderTargetFormat::RGBA8_UINT:
-            return PixelFormat::ABGR8UI;
-        case Tegra::RenderTargetFormat::BGRA8_SRGB:
-        case Tegra::RenderTargetFormat::BGRA8_UNORM:
-            return PixelFormat::BGRA8;
-        case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
-            return PixelFormat::A2B10G10R10U;
-        case Tegra::RenderTargetFormat::RGBA16_FLOAT:
-            return PixelFormat::RGBA16F;
-        case Tegra::RenderTargetFormat::RGBA16_UNORM:
-            return PixelFormat::RGBA16U;
-        case Tegra::RenderTargetFormat::RGBA16_UINT:
-            return PixelFormat::RGBA16UI;
-        case Tegra::RenderTargetFormat::RGBA32_FLOAT:
-            return PixelFormat::RGBA32F;
-        case Tegra::RenderTargetFormat::RG32_FLOAT:
-            return PixelFormat::RG32F;
-        case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
-            return PixelFormat::R11FG11FB10F;
-        case Tegra::RenderTargetFormat::B5G6R5_UNORM:
-            return PixelFormat::B5G6R5U;
-        case Tegra::RenderTargetFormat::BGR5A1_UNORM:
-            return PixelFormat::A1B5G5R5U;
-        case Tegra::RenderTargetFormat::RGBA32_UINT:
-            return PixelFormat::RGBA32UI;
-        case Tegra::RenderTargetFormat::R8_UNORM:
-            return PixelFormat::R8U;
-        case Tegra::RenderTargetFormat::R8_UINT:
-            return PixelFormat::R8UI;
-        case Tegra::RenderTargetFormat::RG16_FLOAT:
-            return PixelFormat::RG16F;
-        case Tegra::RenderTargetFormat::RG16_UINT:
-            return PixelFormat::RG16UI;
-        case Tegra::RenderTargetFormat::RG16_SINT:
-            return PixelFormat::RG16I;
-        case Tegra::RenderTargetFormat::RG16_UNORM:
-            return PixelFormat::RG16;
-        case Tegra::RenderTargetFormat::RG16_SNORM:
-            return PixelFormat::RG16S;
-        case Tegra::RenderTargetFormat::RG8_UNORM:
-            return PixelFormat::RG8U;
-        case Tegra::RenderTargetFormat::RG8_SNORM:
-            return PixelFormat::RG8S;
-        case Tegra::RenderTargetFormat::R16_FLOAT:
-            return PixelFormat::R16F;
-        case Tegra::RenderTargetFormat::R16_UNORM:
-            return PixelFormat::R16U;
-        case Tegra::RenderTargetFormat::R16_SNORM:
-            return PixelFormat::R16S;
-        case Tegra::RenderTargetFormat::R16_UINT:
-            return PixelFormat::R16UI;
-        case Tegra::RenderTargetFormat::R16_SINT:
-            return PixelFormat::R16I;
-        case Tegra::RenderTargetFormat::R32_FLOAT:
-            return PixelFormat::R32F;
-        case Tegra::RenderTargetFormat::R32_UINT:
-            return PixelFormat::R32UI;
-        case Tegra::RenderTargetFormat::RG32_UINT:
-            return PixelFormat::RG32UI;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
-    static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
-                                                    Tegra::Texture::ComponentType component_type) {
-        // TODO(Subv): Properly implement this
-        switch (format) {
-        case Tegra::Texture::TextureFormat::A8R8G8B8:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::ABGR8U;
-            case Tegra::Texture::ComponentType::SNORM:
-                return PixelFormat::ABGR8S;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::ABGR8UI;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::B5G6R5:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::B5G6R5U;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::A2B10G10R10:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::A2B10G10R10U;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::A1B5G5R5:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::A1B5G5R5U;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R8:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::R8U;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::R8UI;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::G8R8:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::G8R8U;
-            case Tegra::Texture::ComponentType::SNORM:
-                return PixelFormat::G8R8S;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::RGBA16U;
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::RGBA16F;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::BF10GF11RF11:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::R11FG11FB10F;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::RGBA32F;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::RGBA32UI;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R32_G32:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::RG32F;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::RG32UI;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R32_G32_B32:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::RGB32F;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R16:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::R16F;
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::R16U;
-            case Tegra::Texture::ComponentType::SNORM:
-                return PixelFormat::R16S;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::R16UI;
-            case Tegra::Texture::ComponentType::SINT:
-                return PixelFormat::R16I;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R32:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::R32F;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::R32UI;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::ZF32:
-            return PixelFormat::Z32F;
-        case Tegra::Texture::TextureFormat::Z16:
-            return PixelFormat::Z16;
-        case Tegra::Texture::TextureFormat::Z24S8:
-            return PixelFormat::Z24S8;
-        case Tegra::Texture::TextureFormat::DXT1:
-            return PixelFormat::DXT1;
-        case Tegra::Texture::TextureFormat::DXT23:
-            return PixelFormat::DXT23;
-        case Tegra::Texture::TextureFormat::DXT45:
-            return PixelFormat::DXT45;
-        case Tegra::Texture::TextureFormat::DXN1:
-            return PixelFormat::DXN1;
-        case Tegra::Texture::TextureFormat::DXN2:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::DXN2UNORM;
-            case Tegra::Texture::ComponentType::SNORM:
-                return PixelFormat::DXN2SNORM;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::BC7U:
-            return PixelFormat::BC7U;
-        case Tegra::Texture::TextureFormat::BC6H_UF16:
-            return PixelFormat::BC6H_UF16;
-        case Tegra::Texture::TextureFormat::BC6H_SF16:
-            return PixelFormat::BC6H_SF16;
-        case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
-            return PixelFormat::ASTC_2D_4X4;
-        case Tegra::Texture::TextureFormat::ASTC_2D_5X4:
-            return PixelFormat::ASTC_2D_5X4;
-        case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
-            return PixelFormat::ASTC_2D_8X8;
-        case Tegra::Texture::TextureFormat::ASTC_2D_8X5:
-            return PixelFormat::ASTC_2D_8X5;
-        case Tegra::Texture::TextureFormat::R16_G16:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::RG16F;
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::RG16;
-            case Tegra::Texture::ComponentType::SNORM:
-                return PixelFormat::RG16S;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::RG16UI;
-            case Tegra::Texture::ComponentType::SINT:
-                return PixelFormat::RG16I;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}",
-                         static_cast<u32>(format), static_cast<u32>(component_type));
-            UNREACHABLE();
-        }
-    }
-
-    static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
-        // TODO(Subv): Implement more component types
-        switch (type) {
-        case Tegra::Texture::ComponentType::UNORM:
-            return ComponentType::UNorm;
-        case Tegra::Texture::ComponentType::FLOAT:
-            return ComponentType::Float;
-        case Tegra::Texture::ComponentType::SNORM:
-            return ComponentType::SNorm;
-        case Tegra::Texture::ComponentType::UINT:
-            return ComponentType::UInt;
-        case Tegra::Texture::ComponentType::SINT:
-            return ComponentType::SInt;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
-            UNREACHABLE();
-        }
-    }
-
-    static ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
-        // TODO(Subv): Implement more render targets
-        switch (format) {
-        case Tegra::RenderTargetFormat::RGBA8_UNORM:
-        case Tegra::RenderTargetFormat::RGBA8_SRGB:
-        case Tegra::RenderTargetFormat::BGRA8_UNORM:
-        case Tegra::RenderTargetFormat::BGRA8_SRGB:
-        case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
-        case Tegra::RenderTargetFormat::R8_UNORM:
-        case Tegra::RenderTargetFormat::RG16_UNORM:
-        case Tegra::RenderTargetFormat::R16_UNORM:
-        case Tegra::RenderTargetFormat::B5G6R5_UNORM:
-        case Tegra::RenderTargetFormat::BGR5A1_UNORM:
-        case Tegra::RenderTargetFormat::RG8_UNORM:
-        case Tegra::RenderTargetFormat::RGBA16_UNORM:
-            return ComponentType::UNorm;
-        case Tegra::RenderTargetFormat::RGBA8_SNORM:
-        case Tegra::RenderTargetFormat::RG16_SNORM:
-        case Tegra::RenderTargetFormat::R16_SNORM:
-        case Tegra::RenderTargetFormat::RG8_SNORM:
-            return ComponentType::SNorm;
-        case Tegra::RenderTargetFormat::RGBA16_FLOAT:
-        case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
-        case Tegra::RenderTargetFormat::RGBA32_FLOAT:
-        case Tegra::RenderTargetFormat::RG32_FLOAT:
-        case Tegra::RenderTargetFormat::RG16_FLOAT:
-        case Tegra::RenderTargetFormat::R16_FLOAT:
-        case Tegra::RenderTargetFormat::R32_FLOAT:
-            return ComponentType::Float;
-        case Tegra::RenderTargetFormat::RGBA32_UINT:
-        case Tegra::RenderTargetFormat::RGBA16_UINT:
-        case Tegra::RenderTargetFormat::RG16_UINT:
-        case Tegra::RenderTargetFormat::R8_UINT:
-        case Tegra::RenderTargetFormat::R16_UINT:
-        case Tegra::RenderTargetFormat::RG32_UINT:
-        case Tegra::RenderTargetFormat::R32_UINT:
-        case Tegra::RenderTargetFormat::RGBA8_UINT:
-            return ComponentType::UInt;
-        case Tegra::RenderTargetFormat::RG16_SINT:
-        case Tegra::RenderTargetFormat::R16_SINT:
-            return ComponentType::SInt;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
-    static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
-        switch (format) {
-        case Tegra::FramebufferConfig::PixelFormat::ABGR8:
-            return PixelFormat::ABGR8U;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
-    static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
-        switch (format) {
-        case Tegra::DepthFormat::Z16_UNORM:
-        case Tegra::DepthFormat::S8_Z24_UNORM:
-        case Tegra::DepthFormat::Z24_S8_UNORM:
-            return ComponentType::UNorm;
-        case Tegra::DepthFormat::Z32_FLOAT:
-        case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
-            return ComponentType::Float;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
-    static SurfaceType GetFormatType(PixelFormat pixel_format) {
-        if (static_cast<std::size_t>(pixel_format) <
-            static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
-            return SurfaceType::ColorTexture;
-        }
-
-        if (static_cast<std::size_t>(pixel_format) <
-            static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) {
-            return SurfaceType::Depth;
-        }
-
-        if (static_cast<std::size_t>(pixel_format) <
-            static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
-            return SurfaceType::DepthStencil;
-        }
-
-        // TODO(Subv): Implement the other formats
-        ASSERT(false);
-
-        return SurfaceType::Invalid;
-    }
-
-    /// Returns the sizer in bytes of the specified pixel format
-    static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
-        if (pixel_format == SurfaceParams::PixelFormat::Invalid) {
-            return 0;
-        }
-        return GetFormatBpp(pixel_format) / CHAR_BIT;
+        return VideoCore::Surface::GetFormatBpp(pixel_format);
    }

    /// Returns the rectangle corresponding to this surface
-    MathUtil::Rectangle<u32> GetRect() const;
+    MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;

    /// Returns the total size of this surface in bytes, adjusted for compression
    std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -761,7 +94,7 @@ struct SurfaceParams {

    /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps.
    std::size_t MemorySize() const {
-        std::size_t size = InnerMemorySize(is_layered);
+        std::size_t size = InnerMemorySize(false, is_layered);
        if (is_layered)
            return size * depth;
        return size;
@@ -770,12 +103,78 @@ struct SurfaceParams {
    /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
    /// mipmaps.
    std::size_t LayerMemorySize() const {
-        return InnerMemorySize(true);
+        return InnerMemorySize(false, true);
    }

    /// Returns the size of a layer of this surface in OpenGL.
-    std::size_t LayerSizeGL() const {
-        return SizeInBytesRaw(true) / depth;
+    std::size_t LayerSizeGL(u32 mip_level) const {
+        return InnerMipmapMemorySize(mip_level, true, is_layered, false);
+    }
+
+    std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const {
+        std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed);
+        if (is_layered)
+            return size * depth;
+        return size;
+    }
+
+    std::size_t GetMipmapLevelOffset(u32 mip_level) const {
+        std::size_t offset = 0;
+        for (u32 i = 0; i < mip_level; i++)
+            offset += InnerMipmapMemorySize(i, false, is_layered);
+        return offset;
+    }
+
+    std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const {
+        std::size_t offset = 0;
+        for (u32 i = 0; i < mip_level; i++)
+            offset += InnerMipmapMemorySize(i, true, is_layered);
+        return offset;
+    }
+
+    u32 MipWidth(u32 mip_level) const {
+        return std::max(1U, width >> mip_level);
+    }
+
+    u32 MipHeight(u32 mip_level) const {
+        return std::max(1U, height >> mip_level);
+    }
+
+    u32 MipDepth(u32 mip_level) const {
+        return is_layered ? depth : std::max(1U, depth >> mip_level);
+    }
+
+    // Auto block resizing algorithm from:
+    // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+    u32 MipBlockHeight(u32 mip_level) const {
+        if (mip_level == 0)
+            return block_height;
+        u32 alt_height = MipHeight(mip_level);
+        u32 h = GetDefaultBlockHeight(pixel_format);
+        u32 blocks_in_y = (alt_height + h - 1) / h;
+        u32 bh = 16;
+        while (bh > 1 && blocks_in_y <= bh * 4) {
+            bh >>= 1;
+        }
+        return bh;
+    }
+
+    u32 MipBlockDepth(u32 mip_level) const {
+        if (mip_level == 0)
+            return block_depth;
+        if (is_layered)
+            return 1;
+        u32 depth = MipDepth(mip_level);
+        u32 bd = 32;
+        while (bd > 1 && depth * 2 <= bd) {
+            bd >>= 1;
+        }
+        if (bd == 32) {
+            u32 bh = MipBlockHeight(mip_level);
+            if (bh >= 4)
+                return 16;
+        }
+        return bd;
    }

    /// Creates SurfaceParams from a texture configuration
@@ -819,7 +218,7 @@ struct SurfaceParams {
    SurfaceTarget target;
    u32 max_mip_level;
    bool is_layered;
-
+    bool srgb_conversion;
    // Parameters used for caching
    VAddr addr;
    Tegra::GPUVAddr gpu_addr;
@@ -836,7 +235,10 @@ struct SurfaceParams {
    } rt;

 private:
-    std::size_t InnerMemorySize(bool layer_only = false) const;
+    std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false,
+                                      bool uncompressed = false) const;
+    std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false,
+                                bool uncompressed = false) const;
 };

 }; // namespace OpenGL
@@ -862,6 +264,8 @@ struct hash<SurfaceReserveKey> {

 namespace OpenGL {

+class RasterizerOpenGL;
+
 class CachedSurface final : public RasterizerCacheObject {
 public:
    CachedSurface(const SurfaceParams& params);
@@ -898,8 +302,10 @@ public:
    void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);

 private:
+    void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
+
    OGLTexture texture;
-    std::vector<u8> gl_buffer;
+    std::vector<std::vector<u8>> gl_buffer;
    SurfaceParams params;
    GLenum gl_target;
    std::size_t cached_size_in_bytes;
@@ -907,7 +313,7 @@ private:

 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
 public:
-    RasterizerCacheOpenGL();
+    explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer);

    /// Get a surface based on the texture configuration
    Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
--- a/Show More
+++ b/Show More