Fixes and corrections on formatting.

Fixes to multilevelqueue's iterator.
Use MultiLevelQueue instead of old ThreadQueueList
2019-03-27 14:49:43 -04:00 · 2019-03-27 14:34:33 -04:00 · 2019-03-27 14:34:32 -04:00 · 2019-03-27 14:34:31 -04:00 · 2019-03-27 14:34:29 -04:00 · 2019-03-27 14:33:44 -04:00
70 changed files with 1778 additions and 553 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -104,78 +104,12 @@ endif()
 message(STATUS "Target architecture: ${ARCHITECTURE}")


-# Configure compilation flags
+# Configure C++ standard
 # ===========================

 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)

-if (NOT MSVC)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
-
-    if (MINGW)
-        add_definitions(-DMINGW_HAS_SECURE_API)
-
-        if (MINGW_STATIC_BUILD)
-            add_definitions(-DQT_STATICPLUGIN)
-            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static")
-            set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
-        endif()
-    endif()
-else()
-    # Silence "deprecation" warnings
-    add_definitions(/D_CRT_SECURE_NO_WARNINGS /D_CRT_NONSTDC_NO_DEPRECATE /D_SCL_SECURE_NO_WARNINGS)
-    # Avoid windows.h junk
-    add_definitions(/DNOMINMAX)
-    # Avoid windows.h from including some usually unused libs like winsocks.h, since this might cause some redefinition errors.
-    add_definitions(/DWIN32_LEAN_AND_MEAN)
-
-    set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE STRING "" FORCE)
-
-    # Tweak optimization settings
-    # As far as I can tell, there's no way to override the CMake defaults while leaving user
-    # changes intact, so we'll just clobber everything and say sorry.
-    message(STATUS "Cache compiler flags ignored, please edit CMakeLists.txt to change the flags.")
-
-    # /W3 - Level 3 warnings
-    # /MP - Multi-threaded compilation
-    # /Zi - Output debugging information
-    # /Zo - enhanced debug info for optimized builds
-    # /permissive- - enables stricter C++ standards conformance checks
-    set(CMAKE_C_FLAGS   "/W3 /MP /Zi /Zo /permissive-" CACHE STRING "" FORCE)
-    # /EHsc - C++-only exception handling semantics
-    # /Zc:throwingNew - let codegen assume `operator new` will never return null
-    # /Zc:inline - let codegen omit inline functions in object files
-    set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /EHsc /std:c++latest /Zc:throwingNew,inline" CACHE STRING "" FORCE)
-
-    # /MDd - Multi-threaded Debug Runtime DLL
-    set(CMAKE_C_FLAGS_DEBUG   "/Od /MDd" CACHE STRING "" FORCE)
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}" CACHE STRING "" FORCE)
-
-    # /O2 - Optimization level 2
-    # /GS- - No stack buffer overflow checks
-    # /MD - Multi-threaded runtime DLL
-    set(CMAKE_C_FLAGS_RELEASE   "/O2 /GS- /MD" CACHE STRING "" FORCE)
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}" CACHE STRING "" FORCE)
-
-    set(CMAKE_EXE_LINKER_FLAGS_DEBUG   "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
-    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
-endif()
-
-# Set file offset size to 64 bits.
-#
-# On modern Unixes, this is typically already the case. The lone exception is
-# glibc, which may default to 32 bits. glibc allows this to be configured
-# by setting _FILE_OFFSET_BITS.
-if(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR MINGW)
-    add_definitions(-D_FILE_OFFSET_BITS=64)
-endif()
-
-# CMake seems to only define _DEBUG on Windows
-set_property(DIRECTORY APPEND PROPERTY
-    COMPILE_DEFINITIONS $<$<CONFIG:Debug>:_DEBUG> $<$<NOT:$<CONFIG:Debug>>:NDEBUG>)
-
 # System imported libraries
 # ======================

@@ -326,25 +260,21 @@ endif()
 # Platform-specific library requirements
 # ======================================

-IF (APPLE)
-    find_library(COCOA_LIBRARY Cocoa)           # Umbrella framework for everything GUI-related
+if (APPLE)
+    # Umbrella framework for everything GUI-related
+    find_library(COCOA_LIBRARY Cocoa)
    set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY})
-
-    if (CMAKE_CXX_COMPILER_ID STREQUAL Clang)
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++")
-    endif()
-ELSEIF (WIN32)
+elseif (WIN32)
    # WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista)
    add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600)
    set(PLATFORM_LIBRARIES winmm ws2_32)
-    IF (MINGW)
+    if (MINGW)
        # PSAPI is the Process Status API
        set(PLATFORM_LIBRARIES ${PLATFORM_LIBRARIES} psapi imm32 version)
-    ENDIF (MINGW)
-ELSEIF (CMAKE_SYSTEM_NAME MATCHES "^(Linux|kFreeBSD|GNU|SunOS)$")
+    endif()
+elseif (CMAKE_SYSTEM_NAME MATCHES "^(Linux|kFreeBSD|GNU|SunOS)$")
    set(PLATFORM_LIBRARIES rt)
-ENDIF (APPLE)
+endif()

 # Setup a custom clang-format target (if clang-format can be found) that will run
 # against all the src files. This should be used before making a pull request.
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,18 +1,79 @@
 # Enable modules to include each other's files
 include_directories(.)

+# CMake seems to only define _DEBUG on Windows
+set_property(DIRECTORY APPEND PROPERTY
+    COMPILE_DEFINITIONS $<$<CONFIG:Debug>:_DEBUG> $<$<NOT:$<CONFIG:Debug>>:NDEBUG>)
+
+# Set compilation flags
+if (MSVC)
+    set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE STRING "" FORCE)
+
+    # Silence "deprecation" warnings
+    add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
+
+    # Avoid windows.h junk
+    add_definitions(-DNOMINMAX)
+
+    # Avoid windows.h from including some usually unused libs like winsocks.h, since this might cause some redefinition errors.
+    add_definitions(-DWIN32_LEAN_AND_MEAN)
+
+    # /W3 - Level 3 warnings
+    # /MP - Multi-threaded compilation
+    # /Zi - Output debugging information
+    # /Zo - enhanced debug info for optimized builds
+    # /permissive- - enables stricter C++ standards conformance checks
+    # /EHsc - C++-only exception handling semantics
+    # /Zc:throwingNew - let codegen assume `operator new` will never return null
+    # /Zc:inline - let codegen omit inline functions in object files
+    add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
+
+    # /GS- - No stack buffer overflow checks
+    add_compile_options("$<$<CONFIG:Release>:/GS->")
+
+    set(CMAKE_EXE_LINKER_FLAGS_DEBUG   "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
+    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
+else()
+    add_compile_options("-Wno-attributes")
+
+    if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
+        add_compile_options("-stdlib=libc++")
+    endif()
+
+    # Set file offset size to 64 bits.
+    #
+    # On modern Unixes, this is typically already the case. The lone exception is
+    # glibc, which may default to 32 bits. glibc allows this to be configured
+    # by setting _FILE_OFFSET_BITS.
+    if(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR MINGW)
+        add_definitions(-D_FILE_OFFSET_BITS=64)
+    endif()
+
+    if (MINGW)
+        add_definitions(-DMINGW_HAS_SECURE_API)
+
+        if (MINGW_STATIC_BUILD)
+            add_definitions(-DQT_STATICPLUGIN)
+            add_compile_options("-static")
+        endif()
+    endif()
+endif()
+
 add_subdirectory(common)
 add_subdirectory(core)
 add_subdirectory(audio_core)
 add_subdirectory(video_core)
 add_subdirectory(input_common)
 add_subdirectory(tests)
+
 if (ENABLE_SDL2)
    add_subdirectory(yuzu_cmd)
 endif()
+
 if (ENABLE_QT)
    add_subdirectory(yuzu)
 endif()
+
 if (ENABLE_WEB_SERVICE)
    add_subdirectory(web_service)
 endif()
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -38,7 +38,7 @@ Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format fo
      sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {

    release_event = core_timing.RegisterEvent(
-        name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); });
+        name, [this](u64 userdata, s64 cycles_late) { ReleaseActiveBuffer(); });
 }

 void Stream::Play() {
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -98,6 +98,7 @@ add_library(common STATIC
    microprofile.h
    microprofileui.h
    misc.cpp
+    multi_level_queue.h
    page_table.cpp
    page_table.h
    param_package.cpp
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -58,4 +58,43 @@ inline u64 CountLeadingZeroes64(u64 value) {
    return __builtin_clzll(value);
 }
 #endif
+
+#ifdef _MSC_VER
+inline u32 CountTrailingZeroes32(u32 value) {
+    unsigned long trailing_zero = 0;
+
+    if (_BitScanForward(&trailing_zero, value) != 0) {
+        return trailing_zero;
+    }
+
+    return 32;
+}
+
+inline u64 CountTrailingZeroes64(u64 value) {
+    unsigned long trailing_zero = 0;
+
+    if (_BitScanForward64(&trailing_zero, value) != 0) {
+        return trailing_zero;
+    }
+
+    return 64;
+}
+#else
+inline u32 CountTrailingZeroes32(u32 value) {
+    if (value == 0) {
+        return 32;
+    }
+
+    return __builtin_ctz(value);
+}
+
+inline u64 CountTrailingZeroes64(u64 value) {
+    if (value == 0) {
+        return 64;
+    }
+
+    return __builtin_ctzll(value);
+}
+#endif
+
 } // namespace Common
--- a/src/common/common_types.h
+++ b/src/common/common_types.h
@@ -40,10 +40,9 @@ using s64 = std::int64_t; ///< 64-bit signed int
 using f32 = float;  ///< 32-bit floating point
 using f64 = double; ///< 64-bit floating point

-// TODO: It would be nice to eventually replace these with strong types that prevent accidental
-// conversion between each other.
-using VAddr = u64; ///< Represents a pointer in the userspace virtual address space.
-using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.
+using VAddr = u64;    ///< Represents a pointer in the userspace virtual address space.
+using PAddr = u64;    ///< Represents a pointer in the ARM11 physical address space.
+using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space.

 using u128 = std::array<std::uint64_t, 2>;
 static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
--- a/src/common/multi_level_queue.h
+++ b/src/common/multi_level_queue.h
@@ -0,0 +1,337 @@
+// Copyright 2019 TuxSH
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <iterator>
+#include <list>
+#include <utility>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Common {
+
+/**
+ * A MultiLevelQueue is a type of priority queue which has the following characteristics:
+ * - iteratable through each of its elements.
+ * - back can be obtained.
+ * - O(1) add, lookup (both front and back)
+ * - discrete priorities and a max of 64 priorities (limited domain)
+ * This type of priority queue is normaly used for managing threads within an scheduler
+ */
+template <typename T, std::size_t Depth>
+class MultiLevelQueue {
+public:
+    using value_type = T;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+
+    using difference_type = typename std::pointer_traits<pointer>::difference_type;
+    using size_type = std::size_t;
+
+    template <bool is_constant>
+    class iterator_impl {
+    public:
+        using iterator_category = std::bidirectional_iterator_tag;
+        using value_type = T;
+        using pointer = std::conditional_t<is_constant, T*, const T*>;
+        using reference = std::conditional_t<is_constant, const T&, T&>;
+        using difference_type = typename std::pointer_traits<pointer>::difference_type;
+
+        friend bool operator==(const iterator_impl& lhs, const iterator_impl& rhs) {
+            if (lhs.IsEnd() && rhs.IsEnd())
+                return true;
+            return std::tie(lhs.current_priority, lhs.it) == std::tie(rhs.current_priority, rhs.it);
+        }
+
+        friend bool operator!=(const iterator_impl& lhs, const iterator_impl& rhs) {
+            return !operator==(lhs, rhs);
+        }
+
+        reference operator*() const {
+            return *it;
+        }
+
+        pointer operator->() const {
+            return it.operator->();
+        }
+
+        iterator_impl& operator++() {
+            if (IsEnd()) {
+                return *this;
+            }
+
+            ++it;
+
+            if (it == GetEndItForPrio()) {
+                u64 prios = mlq.used_priorities;
+                prios &= ~((1ULL << (current_priority + 1)) - 1);
+                if (prios == 0) {
+                    current_priority = mlq.depth();
+                } else {
+                    current_priority = CountTrailingZeroes64(prios);
+                    it = GetBeginItForPrio();
+                }
+            }
+            return *this;
+        }
+
+        iterator_impl& operator--() {
+            if (IsEnd()) {
+                if (mlq.used_priorities != 0) {
+                    current_priority = 63 - CountLeadingZeroes64(mlq.used_priorities);
+                    it = GetEndItForPrio();
+                    --it;
+                }
+            } else if (it == GetBeginItForPrio()) {
+                u64 prios = mlq.used_priorities;
+                prios &= (1ULL << current_priority) - 1;
+                if (prios != 0) {
+                    current_priority = CountTrailingZeroes64(prios);
+                    it = GetEndItForPrio();
+                    --it;
+                }
+            } else {
+                --it;
+            }
+            return *this;
+        }
+
+        iterator_impl operator++(int) {
+            const iterator_impl v{*this};
+            ++(*this);
+            return v;
+        }
+
+        iterator_impl operator--(int) {
+            const iterator_impl v{*this};
+            --(*this);
+            return v;
+        }
+
+        // allow implicit const->non-const
+        iterator_impl(const iterator_impl<false>& other)
+            : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
+
+        iterator_impl(const iterator_impl<true>& other)
+            : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
+
+        iterator_impl& operator=(const iterator_impl<false>& other) {
+            mlq = other.mlq;
+            it = other.it;
+            current_priority = other.current_priority;
+            return *this;
+        }
+
+        friend class iterator_impl<true>;
+        iterator_impl() = default;
+
+    private:
+        friend class MultiLevelQueue;
+        using container_ref =
+            std::conditional_t<is_constant, const MultiLevelQueue&, MultiLevelQueue&>;
+        using list_iterator = std::conditional_t<is_constant, typename std::list<T>::const_iterator,
+                                                 typename std::list<T>::iterator>;
+
+        explicit iterator_impl(container_ref mlq, list_iterator it, u32 current_priority)
+            : mlq(mlq), it(it), current_priority(current_priority) {}
+        explicit iterator_impl(container_ref mlq, u32 current_priority)
+            : mlq(mlq), it(), current_priority(current_priority) {}
+
+        bool IsEnd() const {
+            return current_priority == mlq.depth();
+        }
+
+        list_iterator GetBeginItForPrio() const {
+            return mlq.levels[current_priority].begin();
+        }
+
+        list_iterator GetEndItForPrio() const {
+            return mlq.levels[current_priority].end();
+        }
+
+        container_ref mlq;
+        list_iterator it;
+        u32 current_priority;
+    };
+
+    using iterator = iterator_impl<false>;
+    using const_iterator = iterator_impl<true>;
+
+    void add(const T& element, u32 priority, bool send_back = true) {
+        if (send_back)
+            levels[priority].push_back(element);
+        else
+            levels[priority].push_front(element);
+        used_priorities |= 1ULL << priority;
+    }
+
+    void remove(const T& element, u32 priority) {
+        auto it = ListIterateTo(levels[priority], element);
+        if (it == levels[priority].end())
+            return;
+        levels[priority].erase(it);
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void adjust(const T& element, u32 old_priority, u32 new_priority, bool adjust_front = false) {
+        remove(element, old_priority);
+        add(element, new_priority, !adjust_front);
+    }
+    void adjust(const_iterator it, u32 old_priority, u32 new_priority, bool adjust_front = false) {
+        adjust(*it, old_priority, new_priority, adjust_front);
+    }
+
+    void transfer_to_front(const T& element, u32 priority, MultiLevelQueue& other) {
+        ListSplice(other.levels[priority], other.levels[priority].begin(), levels[priority],
+                   ListIterateTo(levels[priority], element));
+
+        other.used_priorities |= 1ULL << priority;
+
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void transfer_to_front(const_iterator it, u32 priority, MultiLevelQueue& other) {
+        transfer_to_front(*it, priority, other);
+    }
+
+    void transfer_to_back(const T& element, u32 priority, MultiLevelQueue& other) {
+        ListSplice(other.levels[priority], other.levels[priority].end(), levels[priority],
+                   ListIterateTo(levels[priority], element));
+
+        other.used_priorities |= 1ULL << priority;
+
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void transfer_to_back(const_iterator it, u32 priority, MultiLevelQueue& other) {
+        transfer_to_back(*it, priority, other);
+    }
+
+    void yield(u32 priority, std::size_t n = 1) {
+        ListShiftForward(levels[priority], n);
+    }
+
+    std::size_t depth() const {
+        return Depth;
+    }
+
+    std::size_t size(u32 priority) const {
+        return levels[priority].size();
+    }
+
+    std::size_t size() const {
+        u64 priorities = used_priorities;
+        std::size_t size = 0;
+        while (priorities != 0) {
+            const u64 current_priority = CountTrailingZeroes64(priorities);
+            size += levels[current_priority].size();
+            priorities &= ~(1ULL << current_priority);
+        }
+        return size;
+    }
+
+    bool empty() const {
+        return used_priorities == 0;
+    }
+
+    bool empty(u32 priority) const {
+        return (used_priorities & (1ULL << priority)) == 0;
+    }
+
+    u32 highest_priority_set(u32 max_priority = 0) const {
+        const u64 priorities =
+            max_priority == 0 ? used_priorities : (used_priorities & ~((1ULL << max_priority) - 1));
+        return priorities == 0 ? Depth : static_cast<u32>(CountTrailingZeroes64(priorities));
+    }
+
+    u32 lowest_priority_set(u32 min_priority = Depth - 1) const {
+        const u64 priorities = min_priority >= Depth - 1
+                                   ? used_priorities
+                                   : (used_priorities & ((1ULL << (min_priority + 1)) - 1));
+        return priorities == 0 ? Depth : 63 - CountLeadingZeroes64(priorities);
+    }
+
+    const_iterator cbegin(u32 max_prio = 0) const {
+        const u32 priority = highest_priority_set(max_prio);
+        return priority == Depth ? cend()
+                                 : const_iterator{*this, levels[priority].cbegin(), priority};
+    }
+    const_iterator begin(u32 max_prio = 0) const {
+        return cbegin(max_prio);
+    }
+    iterator begin(u32 max_prio = 0) {
+        const u32 priority = highest_priority_set(max_prio);
+        return priority == Depth ? end() : iterator{*this, levels[priority].begin(), priority};
+    }
+
+    const_iterator cend(u32 min_prio = Depth - 1) const {
+        return min_prio == Depth - 1 ? const_iterator{*this, Depth} : cbegin(min_prio + 1);
+    }
+    const_iterator end(u32 min_prio = Depth - 1) const {
+        return cend(min_prio);
+    }
+    iterator end(u32 min_prio = Depth - 1) {
+        return min_prio == Depth - 1 ? iterator{*this, Depth} : begin(min_prio + 1);
+    }
+
+    T& front(u32 max_priority = 0) {
+        const u32 priority = highest_priority_set(max_priority);
+        return levels[priority == Depth ? 0 : priority].front();
+    }
+    const T& front(u32 max_priority = 0) const {
+        const u32 priority = highest_priority_set(max_priority);
+        return levels[priority == Depth ? 0 : priority].front();
+    }
+
+    T back(u32 min_priority = Depth - 1) {
+        const u32 priority = lowest_priority_set(min_priority); // intended
+        return levels[priority == Depth ? 63 : priority].back();
+    }
+    const T& back(u32 min_priority = Depth - 1) const {
+        const u32 priority = lowest_priority_set(min_priority); // intended
+        return levels[priority == Depth ? 63 : priority].back();
+    }
+
+private:
+    using const_list_iterator = typename std::list<T>::const_iterator;
+
+    static void ListShiftForward(std::list<T>& list, const std::size_t shift = 1) {
+        if (shift >= list.size()) {
+            return;
+        }
+
+        const auto begin_range = list.begin();
+        const auto end_range = std::next(begin_range, shift);
+        list.splice(list.end(), list, begin_range, end_range);
+    }
+
+    static void ListSplice(std::list<T>& in_list, const_list_iterator position,
+                           std::list<T>& out_list, const_list_iterator element) {
+        in_list.splice(position, out_list, element);
+    }
+
+    static const_list_iterator ListIterateTo(const std::list<T>& list, const T& element) {
+        auto it = list.cbegin();
+        while (it != list.cend() && *it != element) {
+            ++it;
+        }
+        return it;
+    }
+
+    std::array<std::list<T>, Depth> levels;
+    u64 used_priorities = 0;
+};
+
+} // namespace Common
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -16,6 +16,7 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {

    pointers.resize(num_page_table_entries);
    attributes.resize(num_page_table_entries);
+    backing_addr.resize(num_page_table_entries);

    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
    // vector size is subsequently decreased (via resize), the vector might not automatically
@@ -24,6 +25,7 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {

    pointers.shrink_to_fit();
    attributes.shrink_to_fit();
+    backing_addr.shrink_to_fit();
 }

 } // namespace Common
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -21,6 +21,8 @@ enum class PageType : u8 {
    RasterizerCachedMemory,
    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
    Special,
+    /// Page is allocated for use.
+    Allocated,
 };

 struct SpecialRegion {
@@ -66,7 +68,7 @@ struct PageTable {
     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
     * of type `Special`.
     */
-    boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
+    boost::icl::interval_map<u64, std::set<SpecialRegion>> special_regions;

    /**
     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
@@ -74,6 +76,8 @@ struct PageTable {
     */
    std::vector<PageType> attributes;

+    std::vector<u64> backing_addr;
+
    const std::size_t page_size_in_bits{};
 };

--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -70,6 +70,8 @@ add_library(core STATIC
    file_sys/system_archive/ng_word.h
    file_sys/system_archive/system_archive.cpp
    file_sys/system_archive/system_archive.h
+    file_sys/system_archive/system_version.cpp
+    file_sys/system_archive/system_version.h
    file_sys/vfs.cpp
    file_sys/vfs.h
    file_sys/vfs_concat.cpp
@@ -144,6 +146,8 @@ add_library(core STATIC
    hle/kernel/svc_wrap.h
    hle/kernel/thread.cpp
    hle/kernel/thread.h
+    hle/kernel/transfer_memory.cpp
+    hle/kernel/transfer_memory.h
    hle/kernel/vm_manager.cpp
    hle/kernel/vm_manager.h
    hle/kernel/wait_object.cpp
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -460,8 +460,8 @@ Tegra::DebugContext* System::GetGPUDebugContext() const {
 void System::RegisterCheatList(const std::vector<FileSys::CheatList>& list,
                               const std::string& build_id, VAddr code_region_start,
                               VAddr code_region_end) {
-    impl->cheat_engine =
-        std::make_unique<FileSys::CheatEngine>(list, build_id, code_region_start, code_region_end);
+    impl->cheat_engine = std::make_unique<FileSys::CheatEngine>(*this, list, build_id,
+                                                                code_region_start, code_region_end);
 }

 void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -186,7 +186,7 @@ void CoreTiming::Advance() {
        Event evt = std::move(event_queue.front());
        std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
        event_queue.pop_back();
-        evt.type->callback(evt.userdata, static_cast<int>(global_timer - evt.time));
+        evt.type->callback(evt.userdata, global_timer - evt.time);
    }

    is_global_timer_sane = false;
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -15,7 +15,7 @@
 namespace Core::Timing {

 /// A callback that may be scheduled for a particular core timing event.
-using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
+using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;

 /// Contains the characteristics of a particular event.
 struct EventType {
--- a/src/core/file_sys/cheat_engine.cpp
+++ b/src/core/file_sys/cheat_engine.cpp
@@ -11,14 +11,13 @@
 #include "core/core_timing_util.h"
 #include "core/file_sys/cheat_engine.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/service/hid/controllers/controller_base.h"
 #include "core/hle/service/hid/controllers/npad.h"
 #include "core/hle/service/hid/hid.h"
 #include "core/hle/service/sm/sm.h"

 namespace FileSys {

-constexpr u64 CHEAT_ENGINE_TICKS = Core::Timing::BASE_CLOCK_RATE / 60;
+constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
 constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;

 u64 Cheat::Address() const {
@@ -77,8 +76,8 @@ void CheatList::Execute() {
    }
 }

-CheatList::CheatList(ProgramSegment master, ProgramSegment standard)
-    : master_list(master), standard_list(standard) {}
+CheatList::CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard)
+    : master_list{std::move(master)}, standard_list{std::move(standard)}, system{&system_} {}

 bool CheatList::EvaluateConditional(const Cheat& cheat) const {
    using ComparisonFunction = bool (*)(u64, u64);
@@ -89,10 +88,8 @@ bool CheatList::EvaluateConditional(const Cheat& cheat) const {
    };

    if (cheat.type == CodeType::ConditionalInput) {
-        const auto applet_resource = Core::System::GetInstance()
-                                         .ServiceManager()
-                                         .GetService<Service::HID::Hid>("hid")
-                                         ->GetAppletResource();
+        const auto applet_resource =
+            system->ServiceManager().GetService<Service::HID::Hid>("hid")->GetAppletResource();
        if (applet_resource == nullptr) {
            LOG_WARNING(
                Common_Filesystem,
@@ -188,8 +185,9 @@ void CheatList::Loop(const Cheat& cheat) {
    ASSERT(iter != block_pairs.end());
    ASSERT(iter->first < iter->second);

-    for (int i = cheat.Value(4, 4); i >= 0; --i) {
-        register_3 = i;
+    const s32 initial_value = static_cast<s32>(cheat.Value(4, sizeof(s32)));
+    for (s32 i = initial_value; i >= 0; --i) {
+        register_3 = static_cast<u64>(i);
        for (std::size_t c = iter->first + 1; c < iter->second; ++c) {
            current_index = c;
            ExecuteSingleCheat(
@@ -320,14 +318,14 @@ void CheatList::ExecuteBlock(const Block& block) {

 CheatParser::~CheatParser() = default;

-CheatList CheatParser::MakeCheatList(CheatList::ProgramSegment master,
+CheatList CheatParser::MakeCheatList(const Core::System& system, CheatList::ProgramSegment master,
                                     CheatList::ProgramSegment standard) const {
-    return {master, standard};
+    return {system, std::move(master), std::move(standard)};
 }

 TextCheatParser::~TextCheatParser() = default;

-CheatList TextCheatParser::Parse(const std::vector<u8>& data) const {
+CheatList TextCheatParser::Parse(const Core::System& system, const std::vector<u8>& data) const {
    std::stringstream ss;
    ss.write(reinterpret_cast<const char*>(data.data()), data.size());

@@ -375,7 +373,7 @@ CheatList TextCheatParser::Parse(const std::vector<u8>& data) const {
        }
    }

-    return MakeCheatList(master_list, standard_list);
+    return MakeCheatList(system, master_list, standard_list);
 }

 std::array<u8, 16> TextCheatParser::ParseSingleLineCheat(const std::string& line) const {
@@ -425,6 +423,7 @@ std::array<u8, 16> TextCheatParser::ParseSingleLineCheat(const std::string& line
    return out;
 }

+namespace {
 u64 MemoryReadImpl(u32 width, VAddr addr) {
    switch (width) {
    case 1:
@@ -459,17 +458,18 @@ void MemoryWriteImpl(u32 width, VAddr addr, u64 value) {
        UNREACHABLE();
    }
 }
+} // Anonymous namespace

-CheatEngine::CheatEngine(std::vector<CheatList> cheats, const std::string& build_id,
-                         VAddr code_region_start, VAddr code_region_end)
-    : cheats(std::move(cheats)) {
-    auto& core_timing{Core::System::GetInstance().CoreTiming()};
+CheatEngine::CheatEngine(Core::System& system, std::vector<CheatList> cheats_,
+                         const std::string& build_id, VAddr code_region_start,
+                         VAddr code_region_end)
+    : cheats{std::move(cheats_)}, core_timing{system.CoreTiming()} {
    event = core_timing.RegisterEvent(
        "CheatEngine::FrameCallback::" + build_id,
        [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
    core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);

-    const auto& vm_manager = Core::System::GetInstance().CurrentProcess()->VMManager();
+    const auto& vm_manager = system.CurrentProcess()->VMManager();
    for (auto& list : this->cheats) {
        list.SetMemoryParameters(code_region_start, vm_manager.GetHeapRegionBaseAddress(),
                                 code_region_end, vm_manager.GetHeapRegionEndAddress(),
@@ -478,15 +478,14 @@ CheatEngine::CheatEngine(std::vector<CheatList> cheats, const std::string& build
 }

 CheatEngine::~CheatEngine() {
-    auto& core_timing{Core::System::GetInstance().CoreTiming()};
    core_timing.UnscheduleEvent(event, 0);
 }

-void CheatEngine::FrameCallback(u64 userdata, int cycles_late) {
-    for (auto& list : cheats)
+void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
+    for (auto& list : cheats) {
        list.Execute();
+    }

-    auto& core_timing{Core::System::GetInstance().CoreTiming()};
    core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event);
 }

--- a/src/core/file_sys/cheat_engine.h
+++ b/src/core/file_sys/cheat_engine.h
@@ -7,14 +7,18 @@
 #include <map>
 #include <set>
 #include <vector>
-#include <queue>
 #include "common/bit_field.h"
 #include "common/common_types.h"

-namespace Core::Timing {
-struct EventType;
+namespace Core {
+class System;
 }

+namespace Core::Timing {
+class CoreTiming;
+struct EventType;
+} // namespace Core::Timing
+
 namespace FileSys {

 enum class CodeType : u32 {
@@ -133,7 +137,7 @@ public:
    void Execute();

 private:
-    CheatList(ProgramSegment master, ProgramSegment standard);
+    CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard);

    void ProcessBlockPairs(const Block& block);
    void ExecuteSingleCheat(const Cheat& cheat);
@@ -183,6 +187,8 @@ private:
    std::map<u64, u64> block_pairs;

    std::set<u64> encountered_loops;
+
+    const Core::System* system;
 };

 // Intermediary class that parses a text file or other disk format for storing cheats into a
@@ -191,10 +197,10 @@ class CheatParser {
 public:
    virtual ~CheatParser();

-    virtual CheatList Parse(const std::vector<u8>& data) const = 0;
+    virtual CheatList Parse(const Core::System& system, const std::vector<u8>& data) const = 0;

 protected:
-    CheatList MakeCheatList(CheatList::ProgramSegment master,
+    CheatList MakeCheatList(const Core::System& system_, CheatList::ProgramSegment master,
                            CheatList::ProgramSegment standard) const;
 };

@@ -203,7 +209,7 @@ class TextCheatParser final : public CheatParser {
 public:
    ~TextCheatParser() override;

-    CheatList Parse(const std::vector<u8>& data) const override;
+    CheatList Parse(const Core::System& system, const std::vector<u8>& data) const override;

 private:
    std::array<u8, 16> ParseSingleLineCheat(const std::string& line) const;
@@ -212,16 +218,17 @@ private:
 // Class that encapsulates a CheatList and manages its interaction with memory and CoreTiming
 class CheatEngine final {
 public:
-    CheatEngine(std::vector<CheatList> cheats, const std::string& build_id, VAddr code_region_start,
-                VAddr code_region_end);
+    CheatEngine(Core::System& system_, std::vector<CheatList> cheats_, const std::string& build_id,
+                VAddr code_region_start, VAddr code_region_end);
    ~CheatEngine();

 private:
-    void FrameCallback(u64 userdata, int cycles_late);
-
-    Core::Timing::EventType* event;
+    void FrameCallback(u64 userdata, s64 cycles_late);

    std::vector<CheatList> cheats;
+
+    Core::Timing::EventType* event;
+    Core::Timing::CoreTiming& core_timing;
 };

 } // namespace FileSys
--- a/src/core/file_sys/errors.h
+++ b/src/core/file_sys/errors.h
@@ -11,6 +11,9 @@ namespace FileSys {
 constexpr ResultCode ERROR_PATH_NOT_FOUND{ErrorModule::FS, 1};
 constexpr ResultCode ERROR_ENTITY_NOT_FOUND{ErrorModule::FS, 1002};
 constexpr ResultCode ERROR_SD_CARD_NOT_FOUND{ErrorModule::FS, 2001};
+constexpr ResultCode ERROR_OUT_OF_BOUNDS{ErrorModule::FS, 3005};
+constexpr ResultCode ERROR_FAILED_MOUNT_ARCHIVE{ErrorModule::FS, 3223};
+constexpr ResultCode ERROR_INVALID_ARGUMENT{ErrorModule::FS, 6001};
 constexpr ResultCode ERROR_INVALID_OFFSET{ErrorModule::FS, 6061};
 constexpr ResultCode ERROR_INVALID_SIZE{ErrorModule::FS, 6062};

--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -20,6 +20,7 @@
 #include "core/file_sys/vfs_vector.h"
 #include "core/hle/service/filesystem/filesystem.h"
 #include "core/loader/loader.h"
+#include "core/loader/nso.h"
 #include "core/settings.h"

 namespace FileSys {
@@ -32,14 +33,6 @@ constexpr std::array<const char*, 14> EXEFS_FILE_NAMES{
    "subsdk3", "subsdk4",   "subsdk5", "subsdk6", "subsdk7", "subsdk8", "subsdk9",
 };

-struct NSOBuildHeader {
-    u32_le magic;
-    INSERT_PADDING_BYTES(0x3C);
-    std::array<u8, 0x20> build_id;
-    INSERT_PADDING_BYTES(0xA0);
-};
-static_assert(sizeof(NSOBuildHeader) == 0x100, "NSOBuildHeader has incorrect size.");
-
 std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
    std::array<u8, sizeof(u32)> bytes{};
    bytes[0] = version % SINGLE_BYTE_MODULUS;
@@ -163,14 +156,16 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
 }

 std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
-    if (nso.size() < 0x100)
+    if (nso.size() < sizeof(Loader::NSOHeader)) {
        return nso;
+    }

-    NSOBuildHeader header;
-    std::memcpy(&header, nso.data(), sizeof(NSOBuildHeader));
+    Loader::NSOHeader header;
+    std::memcpy(&header, nso.data(), sizeof(header));

-    if (header.magic != Common::MakeMagic('N', 'S', 'O', '0'))
+    if (header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
        return nso;
+    }

    const auto build_id_raw = Common::HexArrayToString(header.build_id);
    const auto build_id = build_id_raw.substr(0, build_id_raw.find_last_not_of('0') + 1);
@@ -213,9 +208,11 @@ std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
        }
    }

-    if (out.size() < 0x100)
+    if (out.size() < sizeof(Loader::NSOHeader)) {
        return nso;
-    std::memcpy(out.data(), &header, sizeof(NSOBuildHeader));
+    }
+
+    std::memcpy(out.data(), &header, sizeof(header));
    return out;
 }

@@ -233,7 +230,7 @@ bool PatchManager::HasNSOPatch(const std::array<u8, 32>& build_id_) const {
    return !CollectPatches(patch_dirs, build_id).empty();
 }

-static std::optional<CheatList> ReadCheatFileFromFolder(u64 title_id,
+static std::optional<CheatList> ReadCheatFileFromFolder(const Core::System& system, u64 title_id,
                                                        const std::array<u8, 0x20>& build_id_,
                                                        const VirtualDir& base_path, bool upper) {
    const auto build_id_raw = Common::HexArrayToString(build_id_, upper);
@@ -254,28 +251,28 @@ static std::optional<CheatList> ReadCheatFileFromFolder(u64 title_id,
    }

    TextCheatParser parser;
-    return parser.Parse(data);
+    return parser.Parse(system, data);
 }

-std::vector<CheatList> PatchManager::CreateCheatList(const std::array<u8, 32>& build_id_) const {
-    std::vector<CheatList> out;
-
+std::vector<CheatList> PatchManager::CreateCheatList(const Core::System& system,
+                                                     const std::array<u8, 32>& build_id_) const {
    const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
    auto patch_dirs = load_dir->GetSubdirectories();
    std::sort(patch_dirs.begin(), patch_dirs.end(),
              [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });

+    std::vector<CheatList> out;
    out.reserve(patch_dirs.size());
    for (const auto& subdir : patch_dirs) {
        auto cheats_dir = subdir->GetSubdirectory("cheats");
        if (cheats_dir != nullptr) {
-            auto res = ReadCheatFileFromFolder(title_id, build_id_, cheats_dir, true);
+            auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
            if (res.has_value()) {
                out.push_back(std::move(*res));
                continue;
            }

-            res = ReadCheatFileFromFolder(title_id, build_id_, cheats_dir, false);
+            res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, false);
            if (res.has_value())
                out.push_back(std::move(*res));
        }
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -12,6 +12,10 @@
 #include "core/file_sys/nca_metadata.h"
 #include "core/file_sys/vfs.h"

+namespace Core {
+class System;
+}
+
 namespace FileSys {

 class NCA;
@@ -47,7 +51,8 @@ public:
    bool HasNSOPatch(const std::array<u8, 0x20>& build_id) const;

    // Creates a CheatList object with all
-    std::vector<CheatList> CreateCheatList(const std::array<u8, 0x20>& build_id) const;
+    std::vector<CheatList> CreateCheatList(const Core::System& system,
+                                           const std::array<u8, 0x20>& build_id) const;

    // Currently tracked RomFS patches:
    // - Game Updates
--- a/src/core/file_sys/system_archive/system_archive.cpp
+++ b/src/core/file_sys/system_archive/system_archive.cpp
@@ -6,6 +6,7 @@
 #include "core/file_sys/romfs.h"
 #include "core/file_sys/system_archive/ng_word.h"
 #include "core/file_sys/system_archive/system_archive.h"
+#include "core/file_sys/system_archive/system_version.h"

 namespace FileSys::SystemArchive {

@@ -30,7 +31,7 @@ constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHI
    {0x0100000000000806, "NgWord", &NgWord1},
    {0x0100000000000807, "SsidList", nullptr},
    {0x0100000000000808, "Dictionary", nullptr},
-    {0x0100000000000809, "SystemVersion", nullptr},
+    {0x0100000000000809, "SystemVersion", &SystemVersion},
    {0x010000000000080A, "AvatarImage", nullptr},
    {0x010000000000080B, "LocalNews", nullptr},
    {0x010000000000080C, "Eula", nullptr},
--- a/src/core/file_sys/system_archive/system_version.cpp
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -0,0 +1,52 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/file_sys/system_archive/system_version.h"
+#include "core/file_sys/vfs_vector.h"
+
+namespace FileSys::SystemArchive {
+
+namespace SystemVersionData {
+
+// This section should reflect the best system version to describe yuzu's HLE api.
+// TODO(DarkLordZach): Update when HLE gets better.
+
+constexpr u8 VERSION_MAJOR = 5;
+constexpr u8 VERSION_MINOR = 1;
+constexpr u8 VERSION_MICRO = 0;
+
+constexpr u8 REVISION_MAJOR = 3;
+constexpr u8 REVISION_MINOR = 0;
+
+constexpr char PLATFORM_STRING[] = "NX";
+constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd";
+constexpr char DISPLAY_VERSION[] = "5.1.0";
+constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0";
+
+} // namespace SystemVersionData
+
+std::string GetLongDisplayVersion() {
+    return SystemVersionData::DISPLAY_TITLE;
+}
+
+VirtualDir SystemVersion() {
+    VirtualFile file = std::make_shared<VectorVfsFile>(std::vector<u8>(0x100), "file");
+    file->WriteObject(SystemVersionData::VERSION_MAJOR, 0);
+    file->WriteObject(SystemVersionData::VERSION_MINOR, 1);
+    file->WriteObject(SystemVersionData::VERSION_MICRO, 2);
+    file->WriteObject(SystemVersionData::REVISION_MAJOR, 4);
+    file->WriteObject(SystemVersionData::REVISION_MINOR, 5);
+    file->WriteArray(SystemVersionData::PLATFORM_STRING,
+                     std::min<u64>(sizeof(SystemVersionData::PLATFORM_STRING), 0x20ULL), 0x8);
+    file->WriteArray(SystemVersionData::VERSION_HASH,
+                     std::min<u64>(sizeof(SystemVersionData::VERSION_HASH), 0x40ULL), 0x28);
+    file->WriteArray(SystemVersionData::DISPLAY_VERSION,
+                     std::min<u64>(sizeof(SystemVersionData::DISPLAY_VERSION), 0x18ULL), 0x68);
+    file->WriteArray(SystemVersionData::DISPLAY_TITLE,
+                     std::min<u64>(sizeof(SystemVersionData::DISPLAY_TITLE), 0x80ULL), 0x80);
+    return std::make_shared<VectorVfsDirectory>(std::vector<VirtualFile>{file},
+                                                std::vector<VirtualDir>{}, "data");
+}
+
+} // namespace FileSys::SystemArchive
--- a/src/core/file_sys/system_archive/system_version.h
+++ b/src/core/file_sys/system_archive/system_version.h
@@ -0,0 +1,16 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include "core/file_sys/vfs_types.h"
+
+namespace FileSys::SystemArchive {
+
+std::string GetLongDisplayVersion();
+
+VirtualDir SystemVersion();
+
+} // namespace FileSys::SystemArchive
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -29,7 +29,7 @@ namespace Kernel {
 * @param thread_handle The handle of the thread that's been awoken
 * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
 */
-static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_late) {
+static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
    const auto proper_handle = static_cast<Handle>(thread_handle);
    const auto& system = Core::System::GetInstance();

--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -8,9 +8,6 @@
 #include <unordered_map>
 #include "core/hle/kernel/object.h"

-template <typename T>
-class ResultVal;
-
 namespace Core {
 class System;
 }
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -2,7 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <map>
 #include <utility>
 #include <vector>

@@ -10,8 +9,11 @@
 #include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/object.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
@@ -57,41 +59,47 @@ static void TransferMutexOwnership(VAddr mutex_addr, SharedPtr<Thread> current_t
    }
 }

-ResultCode Mutex::TryAcquire(HandleTable& handle_table, VAddr address, Handle holding_thread_handle,
+Mutex::Mutex(Core::System& system) : system{system} {}
+Mutex::~Mutex() = default;
+
+ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
                             Handle requesting_thread_handle) {
    // The mutex address must be 4-byte aligned
    if ((address % sizeof(u32)) != 0) {
        return ERR_INVALID_ADDRESS;
    }

+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    Thread* const current_thread = system.CurrentScheduler().GetCurrentThread();
    SharedPtr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
    SharedPtr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle);

    // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another
    // thread.
-    ASSERT(requesting_thread == GetCurrentThread());
+    ASSERT(requesting_thread == current_thread);

-    u32 addr_value = Memory::Read32(address);
+    const u32 addr_value = Memory::Read32(address);

    // If the mutex isn't being held, just return success.
    if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
        return RESULT_SUCCESS;
    }

-    if (holding_thread == nullptr)
+    if (holding_thread == nullptr) {
        return ERR_INVALID_HANDLE;
+    }

    // Wait until the mutex is released
-    GetCurrentThread()->SetMutexWaitAddress(address);
-    GetCurrentThread()->SetWaitHandle(requesting_thread_handle);
+    current_thread->SetMutexWaitAddress(address);
+    current_thread->SetWaitHandle(requesting_thread_handle);

-    GetCurrentThread()->SetStatus(ThreadStatus::WaitMutex);
-    GetCurrentThread()->InvalidateWakeupCallback();
+    current_thread->SetStatus(ThreadStatus::WaitMutex);
+    current_thread->InvalidateWakeupCallback();

    // Update the lock holder thread's priority to prevent priority inversion.
-    holding_thread->AddMutexWaiter(GetCurrentThread());
+    holding_thread->AddMutexWaiter(current_thread);

-    Core::System::GetInstance().PrepareReschedule();
+    system.PrepareReschedule();

    return RESULT_SUCCESS;
 }
@@ -102,7 +110,8 @@ ResultCode Mutex::Release(VAddr address) {
        return ERR_INVALID_ADDRESS;
    }

-    auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(GetCurrentThread(), address);
+    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+    auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address);

    // There are no more threads waiting for the mutex, release it completely.
    if (thread == nullptr) {
@@ -111,7 +120,7 @@ ResultCode Mutex::Release(VAddr address) {
    }

    // Transfer the ownership of the mutex from the previous owner to the new one.
-    TransferMutexOwnership(address, GetCurrentThread(), thread);
+    TransferMutexOwnership(address, current_thread, thread);

    u32 mutex_value = thread->GetWaitHandle();

--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -5,32 +5,34 @@
 #pragma once

 #include "common/common_types.h"
-#include "core/hle/kernel/object.h"

 union ResultCode;

-namespace Kernel {
+namespace Core {
+class System;
+}

-class HandleTable;
-class Thread;
+namespace Kernel {

 class Mutex final {
 public:
+    explicit Mutex(Core::System& system);
+    ~Mutex();
+
    /// Flag that indicates that a mutex still has threads waiting for it.
    static constexpr u32 MutexHasWaitersFlag = 0x40000000;
    /// Mask of the bits in a mutex address value that contain the mutex owner.
    static constexpr u32 MutexOwnerMask = 0xBFFFFFFF;

    /// Attempts to acquire a mutex at the specified address.
-    static ResultCode TryAcquire(HandleTable& handle_table, VAddr address,
-                                 Handle holding_thread_handle, Handle requesting_thread_handle);
+    ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,
+                          Handle requesting_thread_handle);

    /// Releases the mutex at the specified address.
-    static ResultCode Release(VAddr address);
+    ResultCode Release(VAddr address);

 private:
-    Mutex() = default;
-    ~Mutex() = default;
+    Core::System& system;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/object.cpp
+++ b/src/core/hle/kernel/object.cpp
@@ -23,6 +23,7 @@ bool Object::IsWaitable() const {
    case HandleType::Unknown:
    case HandleType::WritableEvent:
    case HandleType::SharedMemory:
+    case HandleType::TransferMemory:
    case HandleType::AddressArbiter:
    case HandleType::ResourceLimit:
    case HandleType::ClientPort:
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -22,6 +22,7 @@ enum class HandleType : u32 {
    WritableEvent,
    ReadableEvent,
    SharedMemory,
+    TransferMemory,
    Thread,
    Process,
    AddressArbiter,
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -229,7 +229,8 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
 }

 Process::Process(Core::System& system)
-    : WaitObject{system.Kernel()}, address_arbiter{system}, system{system} {}
+    : WaitObject{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {}
+
 Process::~Process() = default;

 void Process::Acquire(Thread* thread) {
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -13,6 +13,7 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/process_capability.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/hle/kernel/wait_object.h"
@@ -34,14 +35,6 @@ class Thread;

 struct CodeSet;

-struct AddressMapping {
-    // Address and size must be page-aligned
-    VAddr address;
-    u64 size;
-    bool read_only;
-    bool unk_flag;
-};
-
 enum class MemoryRegion : u16 {
    APPLICATION = 1,
    SYSTEM = 2,
@@ -126,6 +119,16 @@ public:
        return address_arbiter;
    }

+    /// Gets a reference to the process' mutex lock.
+    Mutex& GetMutex() {
+        return mutex;
+    }
+
+    /// Gets a const reference to the process' mutex lock
+    const Mutex& GetMutex() const {
+        return mutex;
+    }
+
    /// Gets the current status of the process
    ProcessStatus GetStatus() const {
        return status;
@@ -288,6 +291,11 @@ private:
    /// Per-process address arbiter.
    AddressArbiter address_arbiter;

+    /// The per-process mutex lock instance used for handling various
+    /// forms of services, such as lock arbitration, and condition
+    /// variable related facilities.
+    Mutex mutex;
+
    /// Random values for svcGetInfo RandomEntropy
    std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy;

--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -30,7 +30,7 @@ Scheduler::~Scheduler() {

 bool Scheduler::HaveReadyThreads() const {
    std::lock_guard<std::mutex> lock(scheduler_mutex);
-    return ready_queue.get_first() != nullptr;
+    return !ready_queue.empty();
 }

 Thread* Scheduler::GetCurrentThread() const {
@@ -46,22 +46,27 @@ Thread* Scheduler::PopNextReadyThread() {
    Thread* thread = GetCurrentThread();

    if (thread && thread->GetStatus() == ThreadStatus::Running) {
+        if (ready_queue.empty()) {
+            return thread;
+        }
        // We have to do better than the current thread.
        // This call returns null when that's not possible.
-        next = ready_queue.pop_first_better(thread->GetPriority());
-        if (!next) {
-            // Otherwise just keep going with the current thread
+        next = ready_queue.front();
+        if (next == nullptr || next->GetPriority() >= thread->GetPriority()) {
            next = thread;
        }
    } else {
-        next = ready_queue.pop_first();
+        if (ready_queue.empty()) {
+            return nullptr;
+        }
+        next = ready_queue.front();
    }

    return next;
 }

 void Scheduler::SwitchContext(Thread* new_thread) {
-    Thread* const previous_thread = GetCurrentThread();
+    Thread* previous_thread = GetCurrentThread();
    Process* const previous_process = system.Kernel().CurrentProcess();

    UpdateLastContextSwitchTime(previous_thread, previous_process);
@@ -75,7 +80,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
        if (previous_thread->GetStatus() == ThreadStatus::Running) {
            // This is only the case when a reschedule is triggered without the current thread
            // yielding execution (i.e. an event triggered, system core time-sliced, etc)
-            ready_queue.push_front(previous_thread->GetPriority(), previous_thread);
+            ready_queue.add(previous_thread, previous_thread->GetPriority(), false);
            previous_thread->SetStatus(ThreadStatus::Ready);
        }
    }
@@ -90,7 +95,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {

        current_thread = new_thread;

-        ready_queue.remove(new_thread->GetPriority(), new_thread);
+        ready_queue.remove(new_thread, new_thread->GetPriority());
        new_thread->SetStatus(ThreadStatus::Running);

        auto* const thread_owner_process = current_thread->GetOwnerProcess();
@@ -147,7 +152,6 @@ void Scheduler::AddThread(SharedPtr<Thread> thread, u32 priority) {
    std::lock_guard<std::mutex> lock(scheduler_mutex);

    thread_list.push_back(std::move(thread));
-    ready_queue.prepare(priority);
 }

 void Scheduler::RemoveThread(Thread* thread) {
@@ -161,33 +165,37 @@ void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
    std::lock_guard<std::mutex> lock(scheduler_mutex);

    ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.push_back(priority, thread);
+    ready_queue.add(thread, priority);
 }

 void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
    std::lock_guard<std::mutex> lock(scheduler_mutex);

    ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.remove(priority, thread);
+    ready_queue.remove(thread, priority);
 }

 void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    if (thread->GetPriority() == priority) {
+        return;
+    }

    // If thread was ready, adjust queues
    if (thread->GetStatus() == ThreadStatus::Ready)
-        ready_queue.move(thread, thread->GetPriority(), priority);
-    else
-        ready_queue.prepare(priority);
+        ready_queue.adjust(thread, thread->GetPriority(), priority);
 }

 Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const {
    std::lock_guard<std::mutex> lock(scheduler_mutex);

    const u32 mask = 1U << core;
-    return ready_queue.get_first_filter([mask, maximum_priority](Thread const* thread) {
-        return (thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority;
-    });
+    for (auto* thread : ready_queue) {
+        if ((thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority) {
+            return thread;
+        }
+    }
+    return nullptr;
 }

 void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -7,7 +7,7 @@
 #include <mutex>
 #include <vector>
 #include "common/common_types.h"
-#include "common/thread_queue_list.h"
+#include "common/multi_level_queue.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/thread.h"

@@ -156,7 +156,7 @@ private:
    std::vector<SharedPtr<Thread>> thread_list;

    /// Lists only ready thread ids.
-    Common::ThreadQueueList<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
+    Common::MultiLevelQueue<Thread*, THREADPRIO_LOWEST + 1> ready_queue;

    SharedPtr<Thread> current_thread = nullptr;

--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -32,6 +32,7 @@
 #include "core/hle/kernel/svc.h"
 #include "core/hle/kernel/svc_wrap.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
@@ -551,9 +552,9 @@ static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr,
        return ERR_INVALID_ADDRESS;
    }

-    auto& handle_table = Core::CurrentProcess()->GetHandleTable();
-    return Mutex::TryAcquire(handle_table, mutex_addr, holding_thread_handle,
-                             requesting_thread_handle);
+    auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
+    return current_process->GetMutex().TryAcquire(mutex_addr, holding_thread_handle,
+                                                  requesting_thread_handle);
 }

 /// Unlock a mutex
@@ -571,7 +572,8 @@ static ResultCode ArbitrateUnlock(VAddr mutex_addr) {
        return ERR_INVALID_ADDRESS;
    }

-    return Mutex::Release(mutex_addr);
+    auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
+    return current_process->GetMutex().Release(mutex_addr);
 }

 enum class BreakType : u32 {
@@ -1340,11 +1342,15 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var
        "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}",
        mutex_addr, condition_variable_addr, thread_handle, nano_seconds);

-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
+    const auto& handle_table = current_process->GetHandleTable();
    SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
    ASSERT(thread);

-    CASCADE_CODE(Mutex::Release(mutex_addr));
+    const auto release_result = current_process->GetMutex().Release(mutex_addr);
+    if (release_result.IsError()) {
+        return release_result;
+    }

    SharedPtr<Thread> current_thread = GetCurrentThread();
    current_thread->SetCondVarWaitAddress(condition_variable_addr);
@@ -1581,14 +1587,121 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
    }

    auto& kernel = Core::System::GetInstance().Kernel();
-    auto process = kernel.CurrentProcess();
-    auto& handle_table = process->GetHandleTable();
-    const auto shared_mem_handle = SharedMemory::Create(kernel, process, size, perms, perms, addr);
+    auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms);

-    CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle));
+    auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
+    const auto result = handle_table.Create(std::move(transfer_mem_handle));
+    if (result.Failed()) {
+        return result.Code();
+    }
+
+    *handle = *result;
    return RESULT_SUCCESS;
 }

+static ResultCode MapTransferMemory(Handle handle, VAddr address, u64 size, u32 permission_raw) {
+    LOG_DEBUG(Kernel_SVC,
+              "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}, permissions=0x{:08X}",
+              handle, address, size, permission_raw);
+
+    if (!Common::Is4KBAligned(address)) {
+        LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
+                  address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
+                  size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size overflows the 64-bit range (address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto permissions = static_cast<MemoryPermission>(permission_raw);
+    if (permissions != MemoryPermission::None && permissions != MemoryPermission::Read &&
+        permissions != MemoryPermission::ReadWrite) {
+        LOG_ERROR(Kernel_SVC, "Invalid transfer memory permissions given (permissions=0x{:08X}).",
+                  permission_raw);
+        return ERR_INVALID_STATE;
+    }
+
+    const auto& kernel = Core::System::GetInstance().Kernel();
+    const auto* const current_process = kernel.CurrentProcess();
+    const auto& handle_table = current_process->GetHandleTable();
+
+    auto transfer_memory = handle_table.Get<TransferMemory>(handle);
+    if (!transfer_memory) {
+        LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
+                  handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size don't fully fit within the ASLR region "
+                  "(address=0x{:016X}, size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return transfer_memory->MapMemory(address, size, permissions);
+}
+
+static ResultCode UnmapTransferMemory(Handle handle, VAddr address, u64 size) {
+    LOG_DEBUG(Kernel_SVC, "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}", handle,
+              address, size);
+
+    if (!Common::Is4KBAligned(address)) {
+        LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
+                  address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
+                  size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size overflows the 64-bit range (address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& kernel = Core::System::GetInstance().Kernel();
+    const auto* const current_process = kernel.CurrentProcess();
+    const auto& handle_table = current_process->GetHandleTable();
+
+    auto transfer_memory = handle_table.Get<TransferMemory>(handle);
+    if (!transfer_memory) {
+        LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
+                  handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size don't fully fit within the ASLR region "
+                  "(address=0x{:016X}, size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return transfer_memory->UnmapMemory(address, size);
+}
+
 static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) {
    LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);

@@ -1964,8 +2077,8 @@ static const FunctionDef SVC_Table[] = {
    {0x4E, nullptr, "ReadWriteRegister"},
    {0x4F, nullptr, "SetProcessActivity"},
    {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"},
-    {0x51, nullptr, "MapTransferMemory"},
-    {0x52, nullptr, "UnmapTransferMemory"},
+    {0x51, SvcWrap<MapTransferMemory>, "MapTransferMemory"},
+    {0x52, SvcWrap<UnmapTransferMemory>, "UnmapTransferMemory"},
    {0x53, nullptr, "CreateInterruptEvent"},
    {0x54, nullptr, "QueryPhysicalAddress"},
    {0x55, nullptr, "QueryIoMapping"},
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -0,0 +1,73 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/shared_memory.h"
+#include "core/hle/kernel/transfer_memory.h"
+#include "core/hle/result.h"
+
+namespace Kernel {
+
+TransferMemory::TransferMemory(KernelCore& kernel) : Object{kernel} {}
+TransferMemory::~TransferMemory() = default;
+
+SharedPtr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr base_address,
+                                                 size_t size, MemoryPermission permissions) {
+    SharedPtr<TransferMemory> transfer_memory{new TransferMemory(kernel)};
+
+    transfer_memory->base_address = base_address;
+    transfer_memory->memory_size = size;
+    transfer_memory->owner_permissions = permissions;
+    transfer_memory->owner_process = kernel.CurrentProcess();
+
+    return transfer_memory;
+}
+
+ResultCode TransferMemory::MapMemory(VAddr address, size_t size, MemoryPermission permissions) {
+    if (memory_size != size) {
+        return ERR_INVALID_SIZE;
+    }
+
+    if (owner_permissions != permissions) {
+        return ERR_INVALID_STATE;
+    }
+
+    if (is_mapped) {
+        return ERR_INVALID_STATE;
+    }
+
+    const auto map_state = owner_permissions == MemoryPermission::None
+                               ? MemoryState::TransferMemoryIsolated
+                               : MemoryState::TransferMemory;
+    auto& vm_manager = owner_process->VMManager();
+    const auto map_result = vm_manager.MapMemoryBlock(
+        address, std::make_shared<std::vector<u8>>(size), 0, size, map_state);
+
+    if (map_result.Failed()) {
+        return map_result.Code();
+    }
+
+    is_mapped = true;
+    return RESULT_SUCCESS;
+}
+
+ResultCode TransferMemory::UnmapMemory(VAddr address, size_t size) {
+    if (memory_size != size) {
+        return ERR_INVALID_SIZE;
+    }
+
+    auto& vm_manager = owner_process->VMManager();
+    const auto result = vm_manager.UnmapRange(address, size);
+
+    if (result.IsError()) {
+        return result;
+    }
+
+    is_mapped = false;
+    return RESULT_SUCCESS;
+}
+
+} // namespace Kernel
--- a/src/core/hle/kernel/transfer_memory.h
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -0,0 +1,91 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/kernel/object.h"
+
+union ResultCode;
+
+namespace Kernel {
+
+class KernelCore;
+class Process;
+
+enum class MemoryPermission : u32;
+
+/// Defines the interface for transfer memory objects.
+///
+/// Transfer memory is typically used for the purpose of
+/// transferring memory between separate process instances,
+/// thus the name.
+///
+class TransferMemory final : public Object {
+public:
+    static constexpr HandleType HANDLE_TYPE = HandleType::TransferMemory;
+
+    static SharedPtr<TransferMemory> Create(KernelCore& kernel, VAddr base_address, size_t size,
+                                            MemoryPermission permissions);
+
+    TransferMemory(const TransferMemory&) = delete;
+    TransferMemory& operator=(const TransferMemory&) = delete;
+
+    TransferMemory(TransferMemory&&) = delete;
+    TransferMemory& operator=(TransferMemory&&) = delete;
+
+    std::string GetTypeName() const override {
+        return "TransferMemory";
+    }
+
+    std::string GetName() const override {
+        return GetTypeName();
+    }
+
+    HandleType GetHandleType() const override {
+        return HANDLE_TYPE;
+    }
+
+    /// Attempts to map transfer memory with the given range and memory permissions.
+    ///
+    /// @param address     The base address to being mapping memory at.
+    /// @param size        The size of the memory to map, in bytes.
+    /// @param permissions The memory permissions to check against when mapping memory.
+    ///
+    /// @pre The given address, size, and memory permissions must all match
+    ///      the same values that were given when creating the transfer memory
+    ///      instance.
+    ///
+    ResultCode MapMemory(VAddr address, size_t size, MemoryPermission permissions);
+
+    /// Unmaps the transfer memory with the given range
+    ///
+    /// @param address The base address to begin unmapping memory at.
+    /// @param size    The size of the memory to unmap, in bytes.
+    ///
+    /// @pre The given address and size must be the same as the ones used
+    ///      to create the transfer memory instance.
+    ///
+    ResultCode UnmapMemory(VAddr address, size_t size);
+
+private:
+    explicit TransferMemory(KernelCore& kernel);
+    ~TransferMemory() override;
+
+    /// The base address for the memory managed by this instance.
+    VAddr base_address = 0;
+
+    /// Size of the memory, in bytes, that this instance manages.
+    size_t memory_size = 0;
+
+    /// The memory permissions that are applied to this instance.
+    MemoryPermission owner_permissions{};
+
+    /// The process that this transfer memory instance was created under.
+    Process* owner_process = nullptr;
+
+    /// Whether or not this transfer memory instance has mapped memory.
+    bool is_mapped = false;
+};
+
+} // namespace Kernel
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -36,9 +36,9 @@ namespace Service::HID {

 // Updating period for each HID device.
 // TODO(ogniK): Find actual polling rate of hid
-constexpr u64 pad_update_ticks = Core::Timing::BASE_CLOCK_RATE / 66;
-constexpr u64 accelerometer_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
-constexpr u64 gyroscope_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
+constexpr s64 pad_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 66);
+constexpr s64 accelerometer_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
+constexpr s64 gyroscope_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
 constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;

 IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
@@ -75,7 +75,7 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
    // Register update callbacks
    auto& core_timing = Core::System::GetInstance().CoreTiming();
    pad_update_event =
-        core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) {
+        core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) {
            UpdateControllers(userdata, cycles_late);
        });

@@ -106,7 +106,7 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
    rb.PushCopyObjects(shared_mem);
 }

-void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) {
+void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {
    auto& core_timing = Core::System::GetInstance().CoreTiming();

    const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -65,7 +65,7 @@ private:
    }

    void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx);
-    void UpdateControllers(u64 userdata, int cycles_late);
+    void UpdateControllers(u64 userdata, s64 cycles_late);

    Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;

--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -89,7 +89,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
    for (const auto& entry : entries) {
        LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
                    entry.offset, entry.nvmap_handle, entry.pages);
-        Tegra::GPUVAddr offset = static_cast<Tegra::GPUVAddr>(entry.offset) << 0x10;
+        GPUVAddr offset = static_cast<GPUVAddr>(entry.offset) << 0x10;
        auto object = nvmap_dev->GetObject(entry.nvmap_handle);
        if (!object) {
            LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle);
@@ -102,7 +102,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
        u64 size = static_cast<u64>(entry.pages) << 0x10;
        ASSERT(size <= object->size);

-        Tegra::GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
+        GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
        ASSERT(returned == offset);
    }
    std::memcpy(output.data(), entries.data(), output.size());
@@ -173,16 +173,8 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
        return 0;
    }

-    auto& system_instance = Core::System::GetInstance();
-
-    // Remove this memory region from the rasterizer cache.
-    auto& gpu = system_instance.GPU();
-    auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
-    ASSERT(cpu_addr);
-    gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size);
-
-    params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
-
+    params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset,
+                                                                                  itr->second.size);
    buffer_mappings.erase(itr->second.offset);

    std::memcpy(output.data(), &params, output.size());
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -26,7 +26,7 @@
 namespace Service::NVFlinger {

 constexpr std::size_t SCREEN_REFRESH_RATE = 60;
-constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
+constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);

 NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
    displays.emplace_back(0, "Default");
@@ -37,7 +37,7 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t

    // Schedule the screen composition events
    composition_event =
-        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
+        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
            Compose();
            this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
        });
--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -2,13 +2,88 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/file_sys/errors.h"
+#include "core/file_sys/system_archive/system_version.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
+#include "core/hle/service/filesystem/filesystem.h"
 #include "core/hle/service/set/set_sys.h"

 namespace Service::Set {

+namespace {
+constexpr u64 SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET = 0x05;
+
+enum class GetFirmwareVersionType {
+    Version1,
+    Version2,
+};
+
+void GetFirmwareVersionImpl(Kernel::HLERequestContext& ctx, GetFirmwareVersionType type) {
+    LOG_WARNING(Service_SET, "called - Using hardcoded firmware version '{}'",
+                FileSys::SystemArchive::GetLongDisplayVersion());
+
+    ASSERT_MSG(ctx.GetWriteBufferSize() == 0x100,
+               "FirmwareVersion output buffer must be 0x100 bytes in size!");
+
+    // Instead of using the normal procedure of checking for the real system archive and if it
+    // doesn't exist, synthesizing one, I feel that that would lead to strange bugs because a
+    // used is using a really old or really new SystemVersion title. The synthesized one ensures
+    // consistence (currently reports as 5.1.0-0.0)
+    const auto archive = FileSys::SystemArchive::SystemVersion();
+
+    const auto early_exit_failure = [&ctx](const std::string& desc, ResultCode code) {
+        LOG_ERROR(Service_SET, "General failure while attempting to resolve firmware version ({}).",
+                  desc.c_str());
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(code);
+    };
+
+    if (archive == nullptr) {
+        early_exit_failure("The system version archive couldn't be synthesized.",
+                           FileSys::ERROR_FAILED_MOUNT_ARCHIVE);
+        return;
+    }
+
+    const auto ver_file = archive->GetFile("file");
+    if (ver_file == nullptr) {
+        early_exit_failure("The system version archive didn't contain the file 'file'.",
+                           FileSys::ERROR_INVALID_ARGUMENT);
+        return;
+    }
+
+    auto data = ver_file->ReadAllBytes();
+    if (data.size() != 0x100) {
+        early_exit_failure("The system version file 'file' was not the correct size.",
+                           FileSys::ERROR_OUT_OF_BOUNDS);
+        return;
+    }
+
+    // If the command is GetFirmwareVersion (as opposed to GetFirmwareVersion2), hardware will
+    // zero out the REVISION_MINOR field.
+    if (type == GetFirmwareVersionType::Version1) {
+        data[SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET] = 0;
+    }
+
+    ctx.WriteBuffer(data);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+} // Anonymous namespace
+
+void SET_SYS::GetFirmwareVersion(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_SET, "called");
+    GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version1);
+}
+
+void SET_SYS::GetFirmwareVersion2(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_SET, "called");
+    GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version2);
+}
+
 void SET_SYS::GetColorSetId(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_SET, "called");

@@ -33,8 +108,8 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
        {0, nullptr, "SetLanguageCode"},
        {1, nullptr, "SetNetworkSettings"},
        {2, nullptr, "GetNetworkSettings"},
-        {3, nullptr, "GetFirmwareVersion"},
-        {4, nullptr, "GetFirmwareVersion2"},
+        {3, &SET_SYS::GetFirmwareVersion, "GetFirmwareVersion"},
+        {4, &SET_SYS::GetFirmwareVersion2, "GetFirmwareVersion2"},
        {5, nullptr, "GetFirmwareVersionDigest"},
        {7, nullptr, "GetLockScreenFlag"},
        {8, nullptr, "SetLockScreenFlag"},
--- a/src/core/hle/service/set/set_sys.h
+++ b/src/core/hle/service/set/set_sys.h
@@ -20,6 +20,8 @@ private:
        BasicBlack = 1,
    };

+    void GetFirmwareVersion(Kernel::HLERequestContext& ctx);
+    void GetFirmwareVersion2(Kernel::HLERequestContext& ctx);
    void GetColorSetId(Kernel::HLERequestContext& ctx);
    void SetColorSetId(Kernel::HLERequestContext& ctx);

--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -21,36 +21,8 @@
 #include "core/settings.h"

 namespace Loader {
-
-struct NsoSegmentHeader {
-    u32_le offset;
-    u32_le location;
-    u32_le size;
-    union {
-        u32_le alignment;
-        u32_le bss_size;
-    };
-};
-static_assert(sizeof(NsoSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
-
-struct NsoHeader {
-    u32_le magic;
-    u32_le version;
-    INSERT_PADDING_WORDS(1);
-    u8 flags;
-    std::array<NsoSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
-    std::array<u8, 0x20> build_id;
-    std::array<u32_le, 3> segments_compressed_size;
-
-    bool IsSegmentCompressed(size_t segment_num) const {
-        ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
-        return ((flags >> segment_num) & 1);
-    }
-};
-static_assert(sizeof(NsoHeader) == 0x6c, "NsoHeader has incorrect size.");
-static_assert(std::is_trivially_copyable_v<NsoHeader>, "NsoHeader isn't trivially copyable.");
-
-struct ModHeader {
+namespace {
+struct MODHeader {
    u32_le magic;
    u32_le dynamic_offset;
    u32_le bss_start_offset;
@@ -59,7 +31,32 @@ struct ModHeader {
    u32_le eh_frame_hdr_end_offset;
    u32_le module_offset; // Offset to runtime-generated module object. typically equal to .bss base
 };
-static_assert(sizeof(ModHeader) == 0x1c, "ModHeader has incorrect size.");
+static_assert(sizeof(MODHeader) == 0x1c, "MODHeader has incorrect size.");
+
+std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
+                                  const NSOSegmentHeader& header) {
+    std::vector<u8> uncompressed_data(header.size);
+    const int bytes_uncompressed =
+        LZ4_decompress_safe(reinterpret_cast<const char*>(compressed_data.data()),
+                            reinterpret_cast<char*>(uncompressed_data.data()),
+                            static_cast<int>(compressed_data.size()), header.size);
+
+    ASSERT_MSG(bytes_uncompressed == static_cast<int>(header.size) &&
+                   bytes_uncompressed == static_cast<int>(uncompressed_data.size()),
+               "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
+
+    return uncompressed_data;
+}
+
+constexpr u32 PageAlignSize(u32 size) {
+    return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
+}
+} // Anonymous namespace
+
+bool NSOHeader::IsSegmentCompressed(size_t segment_num) const {
+    ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
+    return ((flags >> segment_num) & 1) != 0;
+}

 AppLoader_NSO::AppLoader_NSO(FileSys::VirtualFile file) : AppLoader(std::move(file)) {}

@@ -76,38 +73,22 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& file) {
    return FileType::NSO;
 }

-static std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
-                                         const NsoSegmentHeader& header) {
-    std::vector<u8> uncompressed_data(header.size);
-    const int bytes_uncompressed =
-        LZ4_decompress_safe(reinterpret_cast<const char*>(compressed_data.data()),
-                            reinterpret_cast<char*>(uncompressed_data.data()),
-                            static_cast<int>(compressed_data.size()), header.size);
-
-    ASSERT_MSG(bytes_uncompressed == static_cast<int>(header.size) &&
-                   bytes_uncompressed == static_cast<int>(uncompressed_data.size()),
-               "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
-
-    return uncompressed_data;
-}
-
-static constexpr u32 PageAlignSize(u32 size) {
-    return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
-}
-
 std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
                                               const FileSys::VfsFile& file, VAddr load_base,
                                               bool should_pass_arguments,
                                               std::optional<FileSys::PatchManager> pm) {
-    if (file.GetSize() < sizeof(NsoHeader))
+    if (file.GetSize() < sizeof(NSOHeader)) {
        return {};
+    }

-    NsoHeader nso_header{};
-    if (sizeof(NsoHeader) != file.ReadObject(&nso_header))
+    NSOHeader nso_header{};
+    if (sizeof(NSOHeader) != file.ReadObject(&nso_header)) {
        return {};
+    }

-    if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0'))
+    if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
        return {};
+    }

    // Build program image
    Kernel::CodeSet codeset;
@@ -143,10 +124,10 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
    std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32));

    // Read MOD header
-    ModHeader mod_header{};
+    MODHeader mod_header{};
    // Default .bss to size in segment header if MOD0 section doesn't exist
    u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)};
-    std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(ModHeader));
+    std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(MODHeader));
    const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')};
    if (has_mod_header) {
        // Resize program image to include .bss section and page align each section
@@ -158,22 +139,24 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,

    // Apply patches if necessary
    if (pm && (pm->HasNSOPatch(nso_header.build_id) || Settings::values.dump_nso)) {
-        std::vector<u8> pi_header(program_image.size() + 0x100);
-        std::memcpy(pi_header.data(), &nso_header, sizeof(NsoHeader));
-        std::memcpy(pi_header.data() + 0x100, program_image.data(), program_image.size());
+        std::vector<u8> pi_header(sizeof(NSOHeader) + program_image.size());
+        pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
+                         reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
+        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
+                         program_image.end());

        pi_header = pm->PatchNSO(pi_header);

-        std::memcpy(program_image.data(), pi_header.data() + 0x100, program_image.size());
+        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
    }

    // Apply cheats if they exist and the program has a valid title ID
    if (pm) {
-        const auto cheats = pm->CreateCheatList(nso_header.build_id);
+        auto& system = Core::System::GetInstance();
+        const auto cheats = pm->CreateCheatList(system, nso_header.build_id);
        if (!cheats.empty()) {
-            Core::System::GetInstance().RegisterCheatList(
-                cheats, Common::HexArrayToString(nso_header.build_id), load_base,
-                load_base + program_image.size());
+            system.RegisterCheatList(cheats, Common::HexArrayToString(nso_header.build_id),
+                                     load_base, load_base + program_image.size());
        }
    }

--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -4,7 +4,9 @@

 #pragma once

+#include <array>
 #include <optional>
+#include <type_traits>
 #include "common/common_types.h"
 #include "common/swap.h"
 #include "core/file_sys/patch_manager.h"
@@ -16,6 +18,43 @@ class Process;

 namespace Loader {

+struct NSOSegmentHeader {
+    u32_le offset;
+    u32_le location;
+    u32_le size;
+    union {
+        u32_le alignment;
+        u32_le bss_size;
+    };
+};
+static_assert(sizeof(NSOSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
+
+struct NSOHeader {
+    using SHA256Hash = std::array<u8, 0x20>;
+
+    struct RODataRelativeExtent {
+        u32_le data_offset;
+        u32_le size;
+    };
+
+    u32_le magic;
+    u32_le version;
+    u32 reserved;
+    u32_le flags;
+    std::array<NSOSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
+    std::array<u8, 0x20> build_id;
+    std::array<u32_le, 3> segments_compressed_size;
+    std::array<u8, 0x1C> padding;
+    RODataRelativeExtent api_info_extent;
+    RODataRelativeExtent dynstr_extent;
+    RODataRelativeExtent dynsyn_extent;
+    std::array<SHA256Hash, 3> segment_hashes;
+
+    bool IsSegmentCompressed(size_t segment_num) const;
+};
+static_assert(sizeof(NSOHeader) == 0x100, "NSOHeader has incorrect size.");
+static_assert(std::is_trivially_copyable_v<NSOHeader>, "NSOHeader must be trivially copyable.");
+
 constexpr u64 NSO_ARGUMENT_DATA_ALLOCATION_SIZE = 0x9000;

 struct NSOArgumentHeader {
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -48,7 +48,7 @@ static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* me
              (base + size) * PAGE_SIZE);

    // During boot, current_page_table might not be set yet, in which case we need not flush
-    if (current_page_table) {
+    if (Core::System::GetInstance().IsPoweredOn()) {
        Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
                                                                   size * PAGE_SIZE);
    }
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -6,9 +6,6 @@

 #include <cstddef>
 #include <string>
-#include <tuple>
-#include <vector>
-#include <boost/icl/interval_map.hpp>
 #include "common/common_types.h"

 namespace Common {
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,5 +1,7 @@
 add_executable(tests
    common/bit_field.cpp
+    common/bit_utils.cpp
+    common/multi_level_queue.cpp
    common/param_package.cpp
    common/ring_buffer.cpp
    core/arm/arm_test_common.cpp
--- a/src/tests/common/bit_utils.cpp
+++ b/src/tests/common/bit_utils.cpp
@@ -0,0 +1,23 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+#include <math.h>
+#include "common/bit_util.h"
+
+namespace Common {
+
+TEST_CASE("BitUtils::CountTrailingZeroes", "[common]") {
+    REQUIRE(Common::CountTrailingZeroes32(0) == 32);
+    REQUIRE(Common::CountTrailingZeroes64(0) == 64);
+    REQUIRE(Common::CountTrailingZeroes32(9) == 0);
+    REQUIRE(Common::CountTrailingZeroes32(8) == 3);
+    REQUIRE(Common::CountTrailingZeroes32(0x801000) == 12);
+    REQUIRE(Common::CountTrailingZeroes64(9) == 0);
+    REQUIRE(Common::CountTrailingZeroes64(8) == 3);
+    REQUIRE(Common::CountTrailingZeroes64(0x801000) == 12);
+    REQUIRE(Common::CountTrailingZeroes64(0x801000000000UL) == 36);
+}
+
+} // namespace Common
--- a/src/tests/common/multi_level_queue.cpp
+++ b/src/tests/common/multi_level_queue.cpp
@@ -0,0 +1,55 @@
+// Copyright 2019 Yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+#include <math.h>
+#include "common/common_types.h"
+#include "common/multi_level_queue.h"
+
+namespace Common {
+
+TEST_CASE("MultiLevelQueue", "[common]") {
+    std::array<f32, 8> values = {0.0, 5.0, 1.0, 9.0, 8.0, 2.0, 6.0, 7.0};
+    Common::MultiLevelQueue<f32, 64> mlq;
+    REQUIRE(mlq.empty());
+    mlq.add(values[2], 2);
+    mlq.add(values[7], 7);
+    mlq.add(values[3], 3);
+    mlq.add(values[4], 4);
+    mlq.add(values[0], 0);
+    mlq.add(values[5], 5);
+    mlq.add(values[6], 6);
+    mlq.add(values[1], 1);
+    u32 index = 0;
+    bool all_set = true;
+    for (auto& f : mlq) {
+        all_set &= (f == values[index]);
+        index++;
+    }
+    REQUIRE(all_set);
+    REQUIRE(!mlq.empty());
+    f32 v = 8.0;
+    mlq.add(v, 2);
+    v = -7.0;
+    mlq.add(v, 2, false);
+    REQUIRE(mlq.front(2) == -7.0);
+    mlq.yield(2);
+    REQUIRE(mlq.front(2) == values[2]);
+    REQUIRE(mlq.back(2) == -7.0);
+    REQUIRE(mlq.empty(8));
+    v = 10.0;
+    mlq.add(v, 8);
+    mlq.adjust(v, 8, 9);
+    REQUIRE(mlq.front(9) == v);
+    REQUIRE(mlq.empty(8));
+    REQUIRE(!mlq.empty(9));
+    mlq.adjust(values[0], 0, 9);
+    REQUIRE(mlq.highest_priority_set() == 1);
+    REQUIRE(mlq.lowest_priority_set() == 9);
+    mlq.remove(values[1], 1);
+    REQUIRE(mlq.highest_priority_set() == 2);
+    REQUIRE(mlq.empty(1));
+}
+
+} // namespace Common
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -9,7 +9,6 @@

 #include "common/bit_field.h"
 #include "common/common_types.h"
-#include "video_core/memory_manager.h"

 namespace Tegra {

--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -46,7 +46,7 @@ void KeplerMemory::ProcessData(u32 data) {
    // contain a dirty surface that will have to be written back to memory.
    const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
    rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
-    memory_manager.Write32(address, data);
+    memory_manager.Write<u32>(address, data);

    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();

--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -307,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
            // Write the current query sequence to the sequence address.
            // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
            // query.
-            memory_manager.Write32(sequence_address, sequence);
+            memory_manager.Write<u32>(sequence_address, sequence);
        } else {
            // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
            // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -395,7 +395,7 @@ void Maxwell3D::ProcessCBData(u32 value) {

    u8* ptr{memory_manager.GetPointer(address)};
    rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
-    memory_manager.Write32(address, value);
+    memory_manager.Write<u32>(address, value);

    dirty_flags.OnMemoryWrite();

@@ -447,7 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
    for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
         current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {

-        const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)};
+        const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};

        Texture::FullTextureInfo tex_info{};
        // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -482,7 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,

    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);

-    const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)};
+    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};

    Texture::FullTextureInfo tex_info{};
    tex_info.index = static_cast<u32>(offset);
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -88,6 +88,16 @@ void MaxwellDMA::HandleCopy() {
    auto source_ptr{memory_manager.GetPointer(source)};
    auto dst_ptr{memory_manager.GetPointer(dest)};

+    if (!source_ptr) {
+        LOG_ERROR(HW_GPU, "source_ptr is invalid");
+        return;
+    }
+
+    if (!dst_ptr) {
+        LOG_ERROR(HW_GPU, "dst_ptr is invalid");
+        return;
+    }
+
    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
        // copying.
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,6 +12,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_base.h"

 namespace Tegra {
@@ -287,7 +288,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
        block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
        memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block));
    } else {
-        const u32 word{memory_manager->Read32(regs.smaphore_address.SmaphoreAddress())};
+        const u32 word{memory_manager->Read<u32>(regs.smaphore_address.SmaphoreAddress())};
        if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
            (op == GpuSemaphoreOperation::AcquireGequal &&
             static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@@ -314,11 +315,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
 }

 void GPU::ProcessSemaphoreRelease() {
-    memory_manager->Write32(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release);
+    memory_manager->Write<u32>(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release);
 }

 void GPU::ProcessSemaphoreAcquire() {
-    const u32 word = memory_manager->Read32(regs.smaphore_address.SmaphoreAddress());
+    const u32 word = memory_manager->Read<u32>(regs.smaphore_address.SmaphoreAddress());
    const auto value = regs.semaphore_acquire;
    if (word != value) {
        regs.acquire_active = true;
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -9,7 +9,6 @@
 #include "common/common_types.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/dma_pusher.h"
-#include "video_core/memory_manager.h"

 using CacheAddr = std::uintptr_t;
 inline CacheAddr ToCacheAddr(const void* host_ptr) {
@@ -124,6 +123,8 @@ enum class EngineID {
    MAXWELL_DMA_COPY_A = 0xB0B5,
 };

+class MemoryManager;
+
 class GPU {
 public:
    explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
@@ -244,9 +245,8 @@ protected:
 private:
    std::unique_ptr<Tegra::MemoryManager> memory_manager;

-    /// Mapping of command subchannels to their bound engine ids.
+    /// Mapping of command subchannels to their bound engine ids
    std::array<EngineID, 8> bound_engines = {};
-
    /// 3D engine
    std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
    /// 2D engine
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,198 +5,187 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/memory.h"
+#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"

 namespace Tegra {

 MemoryManager::MemoryManager() {
-    // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might
-    // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with
-    // Undertale using 0 for a render target.
-    PageSlot(0) = static_cast<u64>(PageStatus::Reserved);
+    std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
+    std::fill(page_table.attributes.begin(), page_table.attributes.end(),
+              Common::PageType::Unmapped);
+    page_table.Resize(address_space_width);
+
+    // Initialize the map with a single free region covering the entire managed space.
+    VirtualMemoryArea initial_vma;
+    initial_vma.size = address_space_end;
+    vma_map.emplace(initial_vma.base, initial_vma);
+
+    UpdatePageTableForVMA(initial_vma);
 }

 GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
-    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};

-    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+    AllocateMemory(gpu_addr, 0, aligned_size);

-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(*gpu_addr + offset)};
-
-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
-
-        slot = static_cast<u64>(PageStatus::Allocated);
-    }
-
-    return *gpu_addr;
+    return gpu_addr;
 }

 GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
+    const u64 aligned_size{Common::AlignUp(size, page_size)};

-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
-
-        slot = static_cast<u64>(PageStatus::Allocated);
-    }
+    AllocateMemory(gpu_addr, 0, aligned_size);

    return gpu_addr;
 }

 GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
-    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)};
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};

-    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+    MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);

-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(*gpu_addr + offset)};
-
-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
-
-        slot = cpu_addr + offset;
-    }
-
-    const MappedRegion region{cpu_addr, *gpu_addr, size};
-    mapped_regions.push_back(region);
-
-    return *gpu_addr;
+    return gpu_addr;
 }

 GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
-    ASSERT((gpu_addr & PAGE_MASK) == 0);
+    ASSERT((gpu_addr & page_mask) == 0);

-    if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) {
-        // Page has been already mapped. In this case, we must find a new area of memory to use that
-        // is different than the specified one. Super Mario Odyssey hits this scenario when changing
-        // areas, but we do not want to overwrite the old pages.
-        // TODO(bunnei): We need to write a hardware test to confirm this behavior.
+    const u64 aligned_size{Common::AlignUp(size, page_size)};

-        LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr);
-
-        const std::optional<GPUVAddr> new_gpu_addr{
-            FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)};
-
-        ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory");
-
-        gpu_addr = *new_gpu_addr;
-    }
-
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
-
-        ASSERT(slot == static_cast<u64>(PageStatus::Allocated));
-
-        slot = cpu_addr + offset;
-    }
-
-    const MappedRegion region{cpu_addr, gpu_addr, size};
-    mapped_regions.push_back(region);
+    MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);

    return gpu_addr;
 }

 GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
-    ASSERT((gpu_addr & PAGE_MASK) == 0);
+    ASSERT((gpu_addr & page_mask) == 0);

-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};

-        ASSERT(slot != static_cast<u64>(PageStatus::Allocated) &&
-               slot != static_cast<u64>(PageStatus::Unmapped));
+    Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr,
+                                                                                 aligned_size);
+    UnmapRange(gpu_addr, aligned_size);

-        slot = static_cast<u64>(PageStatus::Unmapped);
-    }
-
-    // Delete the region mappings that are contained within the unmapped region
-    mapped_regions.erase(std::remove_if(mapped_regions.begin(), mapped_regions.end(),
-                                        [&](const MappedRegion& region) {
-                                            return region.gpu_addr <= gpu_addr &&
-                                                   region.gpu_addr + region.size < gpu_addr + size;
-                                        }),
-                         mapped_regions.end());
    return gpu_addr;
 }

-GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
-    for (const auto& region : mapped_regions) {
-        const GPUVAddr region_end{region.gpu_addr + region.size};
-        if (region_start >= region.gpu_addr && region_start < region_end) {
-            return region_end;
+GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) {
+    // Find the first Free VMA.
+    const VMAHandle vma_handle{std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) {
+        if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
+            return false;
        }
-    }
-    return {};
-}

-std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
-                                                     PageStatus status) {
-    GPUVAddr gpu_addr{region_start};
-    u64 free_space{};
-    align = (align + PAGE_MASK) & ~PAGE_MASK;
+        const VAddr vma_end{vma.second.base + vma.second.size};
+        return vma_end > region_start && vma_end >= region_start + size;
+    })};

-    while (gpu_addr + free_space < MAX_ADDRESS) {
-        if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) {
-            free_space += PAGE_SIZE;
-            if (free_space >= size) {
-                return gpu_addr;
-            }
-        } else {
-            gpu_addr += free_space + PAGE_SIZE;
-            free_space = 0;
-            gpu_addr = Common::AlignUp(gpu_addr, align);
-        }
-    }
-
-    return {};
-}
-
-std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
-    const VAddr base_addr{PageSlot(gpu_addr)};
-
-    if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
-        base_addr == static_cast<u64>(PageStatus::Unmapped) ||
-        base_addr == static_cast<u64>(PageStatus::Reserved)) {
+    if (vma_handle == vma_map.end()) {
        return {};
    }

-    return base_addr + (gpu_addr & PAGE_MASK);
+    return std::max(region_start, vma_handle->second.base);
 }

-u8 MemoryManager::Read8(GPUVAddr addr) {
-    return Memory::Read8(*GpuToCpuAddress(addr));
+bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
+    return (addr >> page_bits) < page_table.pointers.size();
 }

-u16 MemoryManager::Read16(GPUVAddr addr) {
-    return Memory::Read16(*GpuToCpuAddress(addr));
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) {
+    if (!IsAddressValid(addr)) {
+        return {};
+    }
+
+    VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
+    if (cpu_addr) {
+        return cpu_addr + (addr & page_mask);
+    }
+
+    return {};
 }

-u32 MemoryManager::Read32(GPUVAddr addr) {
-    return Memory::Read32(*GpuToCpuAddress(addr));
+template <typename T>
+T MemoryManager::Read(GPUVAddr addr) {
+    if (!IsAddressValid(addr)) {
+        return {};
+    }
+
+    const u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer) {
+        // NOTE: Avoid adding any extra logic to this fast-path block
+        T value;
+        std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
+        return value;
+    }
+
+    switch (page_table.attributes[addr >> page_bits]) {
+    case Common::PageType::Unmapped:
+        LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr);
+        return 0;
+    case Common::PageType::Memory:
+        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+        break;
+    default:
+        UNREACHABLE();
+    }
+    return {};
 }

-u64 MemoryManager::Read64(GPUVAddr addr) {
-    return Memory::Read64(*GpuToCpuAddress(addr));
+template <typename T>
+void MemoryManager::Write(GPUVAddr addr, T data) {
+    if (!IsAddressValid(addr)) {
+        return;
+    }
+
+    u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer) {
+        // NOTE: Avoid adding any extra logic to this fast-path block
+        std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
+        return;
+    }
+
+    switch (page_table.attributes[addr >> page_bits]) {
+    case Common::PageType::Unmapped:
+        LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
+                  static_cast<u32>(data), addr);
+        return;
+    case Common::PageType::Memory:
+        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+        break;
+    default:
+        UNREACHABLE();
+    }
 }

-void MemoryManager::Write8(GPUVAddr addr, u8 data) {
-    Memory::Write8(*GpuToCpuAddress(addr), data);
-}
-
-void MemoryManager::Write16(GPUVAddr addr, u16 data) {
-    Memory::Write16(*GpuToCpuAddress(addr), data);
-}
-
-void MemoryManager::Write32(GPUVAddr addr, u32 data) {
-    Memory::Write32(*GpuToCpuAddress(addr), data);
-}
-
-void MemoryManager::Write64(GPUVAddr addr, u64 data) {
-    Memory::Write64(*GpuToCpuAddress(addr), data);
-}
+template u8 MemoryManager::Read<u8>(GPUVAddr addr);
+template u16 MemoryManager::Read<u16>(GPUVAddr addr);
+template u32 MemoryManager::Read<u32>(GPUVAddr addr);
+template u64 MemoryManager::Read<u64>(GPUVAddr addr);
+template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data);
+template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
+template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
+template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);

 u8* MemoryManager::GetPointer(GPUVAddr addr) {
-    return Memory::GetPointer(*GpuToCpuAddress(addr));
+    if (!IsAddressValid(addr)) {
+        return {};
+    }
+
+    u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer) {
+        return page_pointer + (addr & page_mask);
+    }
+
+    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
+    return {};
 }

 void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
@@ -210,13 +199,252 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t
    std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
 }

-VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
-    auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]};
-    if (!block) {
-        block = std::make_unique<PageBlock>();
-        block->fill(static_cast<VAddr>(PageStatus::Unmapped));
+void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+                             VAddr backing_addr) {
+    LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
+              (base + size) * page_size);
+
+    const VAddr end{base + size};
+    ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
+               base + page_table.pointers.size());
+
+    std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
+
+    if (memory == nullptr) {
+        std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
+        std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end,
+                  backing_addr);
+    } else {
+        while (base != end) {
+            page_table.pointers[base] = memory;
+            page_table.backing_addr[base] = backing_addr;
+
+            base += 1;
+            memory += page_size;
+            backing_addr += page_size;
+        }
+    }
+}
+
+void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+    MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
+}
+
+void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+    MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
+}
+
+bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
+    ASSERT(base + size == next.base);
+    if (type != next.type) {
+        return {};
+    }
+    if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
+        return {};
+    }
+    if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
+        return {};
+    }
+    return true;
+}
+
+MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
+    if (target >= address_space_end) {
+        return vma_map.end();
+    } else {
+        return std::prev(vma_map.upper_bound(target));
+    }
+}
+
+MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    vma.type = VirtualMemoryArea::Type::Allocated;
+    vma.backing_addr = 0;
+    vma.backing_memory = {};
+    UpdatePageTableForVMA(vma);
+
+    return MergeAdjacent(vma_handle);
+}
+
+MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
+                                                       u64 size) {
+
+    // This is the appropriately sized VMA that will turn into our allocation.
+    VMAIter vma_handle{CarveVMA(target, size)};
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    ASSERT(vma.size == size);
+
+    vma.offset = offset;
+
+    return Allocate(vma_handle);
+}
+
+MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
+                                                         VAddr backing_addr) {
+    // This is the appropriately sized VMA that will turn into our allocation.
+    VMAIter vma_handle{CarveVMA(target, size)};
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    ASSERT(vma.size == size);
+
+    vma.type = VirtualMemoryArea::Type::Mapped;
+    vma.backing_memory = memory;
+    vma.backing_addr = backing_addr;
+    UpdatePageTableForVMA(vma);
+
+    return MergeAdjacent(vma_handle);
+}
+
+void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
+    VMAIter vma{CarveVMARange(target, size)};
+    const VAddr target_end{target + size};
+    const VMAIter end{vma_map.end()};
+
+    // The comparison against the end of the range must be done using addresses since VMAs can be
+    // merged during this process, causing invalidation of the iterators.
+    while (vma != end && vma->second.base < target_end) {
+        // Unmapped ranges return to allocated state and can be reused
+        // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
+        vma = std::next(Allocate(vma));
+    }
+
+    ASSERT(FindVMA(target)->second.size >= size);
+}
+
+MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
+    // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
+    // non-const access to its container.
+    return vma_map.erase(iter, iter); // Erases an empty range of elements
+}
+
+MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
+
+    VMAIter vma_handle{StripIterConstness(FindVMA(base))};
+    if (vma_handle == vma_map.end()) {
+        // Target address is outside the managed range
+        return {};
+    }
+
+    const VirtualMemoryArea& vma{vma_handle->second};
+    if (vma.type == VirtualMemoryArea::Type::Mapped) {
+        // Region is already allocated
+        return {};
+    }
+
+    const VAddr start_in_vma{base - vma.base};
+    const VAddr end_in_vma{start_in_vma + size};
+
+    ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
+               vma.size, end_in_vma);
+
+    if (end_in_vma < vma.size) {
+        // Split VMA at the end of the allocated region
+        SplitVMA(vma_handle, end_in_vma);
+    }
+    if (start_in_vma != 0) {
+        // Split VMA at the start of the allocated region
+        vma_handle = SplitVMA(vma_handle, start_in_vma);
+    }
+
+    return vma_handle;
+}
+
+MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+    ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
+
+    const VAddr target_end{target + size};
+    ASSERT(target_end >= target);
+    ASSERT(size > 0);
+
+    VMAIter begin_vma{StripIterConstness(FindVMA(target))};
+    const VMAIter i_end{vma_map.lower_bound(target_end)};
+    if (std::any_of(begin_vma, i_end, [](const auto& entry) {
+            return entry.second.type == VirtualMemoryArea::Type::Unmapped;
+        })) {
+        return {};
+    }
+
+    if (target != begin_vma->second.base) {
+        begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
+    }
+
+    VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
+    if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
+        end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
+    }
+
+    return begin_vma;
+}
+
+MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
+    VirtualMemoryArea& old_vma{vma_handle->second};
+    VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
+
+    // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
+    // a bug. This restriction might be removed later.
+    ASSERT(offset_in_vma < old_vma.size);
+    ASSERT(offset_in_vma > 0);
+
+    old_vma.size = offset_in_vma;
+    new_vma.base += offset_in_vma;
+    new_vma.size -= offset_in_vma;
+
+    switch (new_vma.type) {
+    case VirtualMemoryArea::Type::Unmapped:
+        break;
+    case VirtualMemoryArea::Type::Allocated:
+        new_vma.offset += offset_in_vma;
+        break;
+    case VirtualMemoryArea::Type::Mapped:
+        new_vma.backing_memory += offset_in_vma;
+        break;
+    }
+
+    ASSERT(old_vma.CanBeMergedWith(new_vma));
+
+    return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
+}
+
+MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
+    const VMAIter next_vma{std::next(iter)};
+    if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
+        iter->second.size += next_vma->second.size;
+        vma_map.erase(next_vma);
+    }
+
+    if (iter != vma_map.begin()) {
+        VMAIter prev_vma{std::prev(iter)};
+        if (prev_vma->second.CanBeMergedWith(iter->second)) {
+            prev_vma->second.size += iter->second.size;
+            vma_map.erase(iter);
+            iter = prev_vma;
+        }
+    }
+
+    return iter;
+}
+
+void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
+    switch (vma.type) {
+    case VirtualMemoryArea::Type::Unmapped:
+        UnmapRegion(vma.base, vma.size);
+        break;
+    case VirtualMemoryArea::Type::Allocated:
+        MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
+        break;
+    case VirtualMemoryArea::Type::Mapped:
+        MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
+        break;
    }
-    return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK];
 }

 } // namespace Tegra
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -1,82 +1,148 @@
-// Copyright 2018 yuzu emulator team
+// Copyright 2018 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

 #pragma once

-#include <array>
-#include <memory>
+#include <map>
 #include <optional>
-#include <vector>

 #include "common/common_types.h"
+#include "common/page_table.h"

 namespace Tegra {

-/// Virtual addresses in the GPU's memory map are 64 bit.
-using GPUVAddr = u64;
+/**
+ * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
+ * with homogeneous attributes across its extents. In this particular implementation each VMA is
+ * also backed by a single host memory allocation.
+ */
+struct VirtualMemoryArea {
+    enum class Type : u8 {
+        Unmapped,
+        Allocated,
+        Mapped,
+    };
+
+    /// Virtual base address of the region.
+    GPUVAddr base{};
+    /// Size of the region.
+    u64 size{};
+    /// Memory area mapping type.
+    Type type{Type::Unmapped};
+    /// CPU memory mapped address corresponding to this memory area.
+    VAddr backing_addr{};
+    /// Offset into the backing_memory the mapping starts from.
+    std::size_t offset{};
+    /// Pointer backing this VMA.
+    u8* backing_memory{};
+
+    /// Tests if this area can be merged to the right with `next`.
+    bool CanBeMergedWith(const VirtualMemoryArea& next) const;
+};

 class MemoryManager final {
 public:
    MemoryManager();

    GPUVAddr AllocateSpace(u64 size, u64 align);
-    GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
+    GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
    GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
-    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
-    GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
-    GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
-    std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
+    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
+    GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
+    std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr);

-    static constexpr u64 PAGE_BITS = 16;
-    static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
-    static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
+    template <typename T>
+    T Read(GPUVAddr addr);

-    u8 Read8(GPUVAddr addr);
-    u16 Read16(GPUVAddr addr);
-    u32 Read32(GPUVAddr addr);
-    u64 Read64(GPUVAddr addr);
+    template <typename T>
+    void Write(GPUVAddr addr, T data);

-    void Write8(GPUVAddr addr, u8 data);
-    void Write16(GPUVAddr addr, u16 data);
-    void Write32(GPUVAddr addr, u32 data);
-    void Write64(GPUVAddr addr, u64 data);
-
-    u8* GetPointer(GPUVAddr vaddr);
+    u8* GetPointer(GPUVAddr addr);

    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
-    void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
+    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);

 private:
-    enum class PageStatus : u64 {
-        Unmapped = 0xFFFFFFFFFFFFFFFFULL,
-        Allocated = 0xFFFFFFFFFFFFFFFEULL,
-        Reserved = 0xFFFFFFFFFFFFFFFDULL,
-    };
+    using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
+    using VMAHandle = VMAMap::const_iterator;
+    using VMAIter = VMAMap::iterator;

-    std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
-                                          PageStatus status);
-    VAddr& PageSlot(GPUVAddr gpu_addr);
+    bool IsAddressValid(GPUVAddr addr) const;
+    void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+                  VAddr backing_addr = 0);
+    void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
+    void UnmapRegion(GPUVAddr base, u64 size);

-    static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
-    static constexpr u64 PAGE_TABLE_BITS{10};
-    static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
-    static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1};
-    static constexpr u64 PAGE_BLOCK_BITS{14};
-    static constexpr u64 PAGE_BLOCK_SIZE{1 << PAGE_BLOCK_BITS};
-    static constexpr u64 PAGE_BLOCK_MASK{PAGE_BLOCK_SIZE - 1};
+    /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
+    VMAHandle FindVMA(GPUVAddr target) const;

-    using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>;
-    std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{};
+    VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);

-    struct MappedRegion {
-        VAddr cpu_addr;
-        GPUVAddr gpu_addr;
-        u64 size;
-    };
+    /**
+     * Maps an unmanaged host memory pointer at a given address.
+     *
+     * @param target The guest address to start the mapping at.
+     * @param memory The memory to be mapped.
+     * @param size Size of the mapping.
+     * @param state MemoryState tag to attach to the VMA.
+     */
+    VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);

-    std::vector<MappedRegion> mapped_regions;
+    /// Unmaps a range of addresses, splitting VMAs as necessary.
+    void UnmapRange(GPUVAddr target, u64 size);
+
+    /// Converts a VMAHandle to a mutable VMAIter.
+    VMAIter StripIterConstness(const VMAHandle& iter);
+
+    /// Marks as the specfied VMA as allocated.
+    VMAIter Allocate(VMAIter vma);
+
+    /**
+     * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
+     * the appropriate error checking.
+     */
+    VMAIter CarveVMA(GPUVAddr base, u64 size);
+
+    /**
+     * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
+     * end of the range.
+     */
+    VMAIter CarveVMARange(GPUVAddr base, u64 size);
+
+    /**
+     * Splits a VMA in two, at the specified offset.
+     * @returns the right side of the split, with the original iterator becoming the left side.
+     */
+    VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
+
+    /**
+     * Checks for and merges the specified VMA with adjacent ones if possible.
+     * @returns the merged VMA or the original if no merging was possible.
+     */
+    VMAIter MergeAdjacent(VMAIter vma);
+
+    /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
+    void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
+
+    /// Finds a free (unmapped region) of the specified size starting at the specified address.
+    GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size);
+
+private:
+    static constexpr u64 page_bits{16};
+    static constexpr u64 page_size{1 << page_bits};
+    static constexpr u64 page_mask{page_size - 1};
+
+    /// Address space in bits, this is fairly arbitrary but sufficiently large.
+    static constexpr u32 address_space_width{39};
+    /// Start address for mapping, this is fairly arbitrary but must be non-zero.
+    static constexpr GPUVAddr address_space_base{0x100000};
+    /// End of address space, based on address space in bits.
+    static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
+
+    Common::PageTable page_table{page_bits};
+    VMAMap vma_map;
 };

 } // namespace Tegra
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,7 +9,6 @@
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"

 namespace VideoCore {

--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -21,8 +21,8 @@ CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr
 OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
    : RasterizerCache{rasterizer}, stream_buffer(size, true) {}

-GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
-                                      std::size_t alignment, bool cache) {
+GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
+                                      bool cache) {
    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();

    // Cache management is a big overhead, so only cache entries with a given size.
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -58,7 +58,7 @@ public:

    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
    /// allocated.
-    GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
+    GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
                          bool cache = true);

    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -46,7 +46,7 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr,
    return search->second;
 }

-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size,
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
                                                              u8* host_ptr) {
    GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
    if (!region) {
@@ -76,8 +76,8 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
    const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
    const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
                    global_region.GetCbufOffset()};
-    const auto actual_addr{memory_manager.Read64(addr)};
-    const auto size{memory_manager.Read32(addr + 8)};
+    const auto actual_addr{memory_manager.Read<u64>(addr)};
+    const auto size{memory_manager.Read<u32>(addr + 8)};

    // Look up global region in the cache based on address
    const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -66,7 +66,7 @@ public:

 private:
    GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
-    GlobalRegion GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, u8* host_ptr);
+    GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
    void ReserveGlobalRegion(GlobalRegion region);

    std::unordered_map<CacheAddr, GlobalRegion> reserve;
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -40,8 +40,7 @@ GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
    return index_offset;
 }

-GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
-                                             u32 count) {
+GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
    const std::size_t map_size{CalculateQuadSize(count)};
    auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);

--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -24,7 +24,7 @@ public:

    GLintptr MakeQuadArray(u32 first, u32 count);

-    GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
+    GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);

 private:
    OGLBufferCache& buffer_cache;
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -225,8 +225,8 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
        if (!vertex_array.IsEnabled())
            continue;

-        const Tegra::GPUVAddr start = vertex_array.StartAddress();
-        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+        const GPUVAddr start = vertex_array.StartAddress();
+        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();

        ASSERT(end > start);
        const u64 size = end - start + 1;
@@ -421,8 +421,8 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
        if (!regs.vertex_array[index].IsEnabled())
            continue;

-        const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress();
-        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+        const GPUVAddr start = regs.vertex_array[index].StartAddress();
+        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();

        ASSERT(end > start);
        size += end - start + 1;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -55,7 +55,7 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
    }
 }

-void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
+void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};

    gpu_addr = gpu_addr_;
@@ -222,7 +222,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
 }

 /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
-    u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+    u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
    u32 block_width, u32 block_height, u32 block_depth,
    Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
    SurfaceParams params{};
@@ -564,6 +564,12 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
 CachedSurface::CachedSurface(const SurfaceParams& params)
    : params{params}, gl_target{SurfaceTargetToGL(params.target)},
      cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} {
+
+    const auto optional_cpu_addr{
+        Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
+    ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
+    cpu_addr = *optional_cpu_addr;
+
    texture.Create(gl_target);

    // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
@@ -603,20 +609,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
    ApplyTextureDefaults(texture.handle, params.max_mip_level);

    OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
-
-    // Clamp size to mapped GPU memory region
-    // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
-    // R32F render buffer. We do not yet know if this is a game bug or something else, but this
-    // check is necessary to prevent flushing from overwriting unmapped memory.
-
-    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
-    if (cached_size_in_bytes > max_size) {
-        LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
-        cached_size_in_bytes = max_size;
-    }
-
-    cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr);
 }

 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -925,7 +917,7 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
 }

 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
-    if (params.gpu_addr == 0 || params.height * params.width == 0) {
+    if (!params.IsValid()) {
        return {};
    }

@@ -980,11 +972,11 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
    const auto& init_params{src_surface->GetSurfaceParams()};
    const auto& dst_params{dst_surface->GetSurfaceParams()};
    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    Tegra::GPUVAddr address{init_params.gpu_addr};
+    GPUVAddr address{init_params.gpu_addr};
    const std::size_t layer_size{dst_params.LayerMemorySize()};
    for (u32 layer = 0; layer < dst_params.depth; layer++) {
        for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
-            const Tegra::GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
+            const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
            const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
            if (!copy) {
                continue;
@@ -1244,10 +1236,9 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar
    return {};
 }

-static std::optional<u32> TryFindBestLayer(Tegra::GPUVAddr addr, const SurfaceParams params,
-                                           u32 mipmap) {
+static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
    const std::size_t size{params.LayerMemorySize()};
-    Tegra::GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
+    GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
    for (u32 i = 0; i < params.depth; i++) {
        if (start == addr) {
            return {i};
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -109,6 +109,11 @@ struct SurfaceParams {
        return size;
    }

+    /// Returns true if the parameters constitute a valid rasterizer surface.
+    bool IsValid() const {
+        return gpu_addr && host_ptr && height && width;
+    }
+
    /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
    /// mipmaps.
    std::size_t LayerMemorySize() const {
@@ -210,7 +215,7 @@ struct SurfaceParams {

    /// Creates SurfaceParams for a depth buffer configuration
    static SurfaceParams CreateForDepthBuffer(
-        u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+        u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
        u32 block_width, u32 block_height, u32 block_depth,
        Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);

@@ -232,7 +237,7 @@ struct SurfaceParams {
    }

    /// Initializes parameters for caching, should be called after everything has been initialized
-    void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
+    void InitCacheParameters(GPUVAddr gpu_addr);

    std::string TargetName() const {
        switch (target) {
@@ -297,7 +302,7 @@ struct SurfaceParams {
    bool srgb_conversion;
    // Parameters used for caching
    u8* host_ptr;
-    Tegra::GPUVAddr gpu_addr;
+    GPUVAddr gpu_addr;
    std::size_t size_in_bytes;
    std::size_t size_in_bytes_gl;

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -32,7 +32,7 @@ struct UnspecializedShader {
 namespace {

 /// Gets the address for the specified shader stage program
-Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
    const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
@@ -486,7 +486,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
    }

    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    const Tegra::GPUVAddr program_addr{GetShaderAddress(program)};
+    const GPUVAddr program_addr{GetShaderAddress(program)};

    // Look up shader in the cache based on address
    const auto& host_ptr{memory_manager.GetPointer(program_addr)};
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -39,8 +39,7 @@ VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,

 VKBufferCache::~VKBufferCache() = default;

-u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
-                                bool cache) {
+u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
    const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
    ASSERT_MSG(cpu_addr, "Invalid GPU address");

--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -68,8 +68,7 @@ public:

    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
    /// allocated.
-    u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
-                     bool cache = true);
+    u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);

    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
    u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -261,7 +261,7 @@ void GraphicsSurfaceWidget::OnSurfaceSourceChanged(int new_value) {

 void GraphicsSurfaceWidget::OnSurfaceAddressChanged(qint64 new_value) {
    if (surface_address != new_value) {
-        surface_address = static_cast<Tegra::GPUVAddr>(new_value);
+        surface_address = static_cast<GPUVAddr>(new_value);

        surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
        emit Update();
--- a/src/yuzu/debugger/graphics/graphics_surface.h
+++ b/src/yuzu/debugger/graphics/graphics_surface.h
@@ -87,7 +87,7 @@ private:
    QPushButton* save_surface;

    Source surface_source;
-    Tegra::GPUVAddr surface_address;
+    GPUVAddr surface_address;
    unsigned surface_width;
    unsigned surface_height;
    Tegra::Texture::TextureFormat surface_format;
Author	SHA1	Message	Date
Fernando Sahmkow	db42bcb306	Fixes and corrections on formatting.	2019-03-27 14:49:43 -04:00
Fernando Sahmkow	f35e09fe0d	Fixes to multilevelqueue's iterator.	2019-03-27 14:34:33 -04:00
Fernando Sahmkow	dde0814837	Use MultiLevelQueue instead of old ThreadQueueList	2019-03-27 14:34:32 -04:00
Fernando Sahmkow	9dbba9240b	Add MultiLevelQueue Tests	2019-03-27 14:34:31 -04:00
Fernando Sahmkow	3bc815a5dc	Implement intrinsics CountTrailingZeroes and test it.	2019-03-27 14:34:29 -04:00
Fernando Sahmkow	522957f9f3	Implement a MultiLevelQueue	2019-03-27 14:33:44 -04:00
bunnei	47f2405ab1	Merge pull request #2285 from lioncash/unused-struct kernel/process: Remove unused AddressMapping struct	2019-03-26 11:17:03 -04:00
bunnei	595511876e	Merge pull request #2287 from lioncash/coretiming-cb core/core_timing: Make callback parameters consistent	2019-03-25 21:06:33 -04:00
bunnei	8a24a804c5	Merge pull request #2286 from lioncash/fwd kernel/kernel: Remove unnecessary forward declaration	2019-03-25 21:05:33 -04:00
bunnei	b93a8a368f	Merge pull request #2288 from lioncash/linkage core/cheat_engine: Make MemoryReadImpl and MemoryWriteImpl internally linked	2019-03-25 21:02:25 -04:00
Lioncash	b26481c94b	core/cheat_engine: Make MemoryReadImpl and MemoryWriteImpl internally linked These don't need to be visible outside of the translation unit, so they can be enclosed within an anonymous namespace.	2019-03-24 18:34:42 -04:00
Lioncash	c5d41fd812	core/core_timing: Make callback parameters consistent In some cases, our callbacks were using s64 as a parameter, and in other cases, they were using an int, which is inconsistent. To make all callbacks consistent, we can just use an s64 as the type for late cycles, given it gets rid of the need to cast internally. While we're at it, also resolve some signed/unsigned conversions that were occurring related to the callback registration.	2019-03-24 18:12:17 -04:00
Lioncash	bd7ec1a749	kernel/kernel: Remove unnecessary forward declaration This is no longer necessary, as ResultVal isn't used anywhere in the header.	2019-03-24 17:48:54 -04:00
Lioncash	7c4bc7b883	kernel/process: Remove unused AddressMapping struct Another leftover from citra that's now no longer necessary.	2019-03-24 17:40:11 -04:00
bunnei	3f74518e19	Merge pull request #2232 from lioncash/transfer-memory core/hle/kernel: Split transfer memory handling out into its own class	2019-03-24 16:00:23 -04:00
bunnei	1665b70cc6	Merge pull request #2221 from DarkLordZach/firmware-version set_sys: Implement GetFirmwareVersion(2) for libnx hosversion	2019-03-23 13:48:29 -04:00
bunnei	f08db7295a	Merge pull request #2253 from lioncash/flags Migrate off directly modifying CMAKE_* compilation-related flags directly	2019-03-23 13:46:53 -04:00
bunnei	6af322a347	Merge pull request #2280 from lioncash/nso loader/nso: Minor refactoring	2019-03-23 13:46:09 -04:00
bunnei	819dd93257	Merge pull request #2279 from lioncash/cheat-global file_sys/cheat_engine: Remove use of global system accessors	2019-03-22 18:41:44 -04:00
bunnei	e5893db3e6	Merge pull request #2256 from bunnei/gpu-vmm gpu: Rewrite MemoryManager based on the VMManager implementation.	2019-03-22 18:41:12 -04:00
bunnei	a7157fe27d	Merge pull request #2277 from bunnei/fix-smo-transitions Revert "Devirtualize Register/Unregister and use a wrapper instead."	2019-03-22 18:40:53 -04:00
Lioncash	f3297d8cd1	loader/nso: Place translation unit specific functions into an anonymous namespace Makes it impossible to indirectly violate the ODR in some other translation unit due to these existing.	2019-03-22 15:25:53 -04:00
Lioncash	733cf179b8	file_sys/cheat_engine: Silence truncation and sign-conversion warnings	2019-03-22 14:43:41 -04:00
Lioncash	540235bb05	file_sys/cheat_engine: Remove use of global system accessors Instead, pass in the core timing instance and make the dependency explicit in the interface.	2019-03-22 14:43:37 -04:00
Lioncash	611f4666fd	loader/nso: Clean up use of magic constants Now that the NSO header has the proper size, we can just use sizeof on it instead of having magic constants.	2019-03-22 14:39:17 -04:00
Lioncash	1cf90f4570	file_sys/patch_manager: Deduplicate NSO header This source file was utilizing its own version of the NSO header. Instead of keeping this around, we can have the patch manager also use the version of the header that we have defined in loader/nso.h	2019-03-22 14:39:10 -04:00
Lioncash	90e27ea003	loader/nso: Fix definition of the NSO header struct The total struct itself is 0x100 (256) bytes in size, so we should be providing that amount of data. Without the data, this can result in omitted data from the final loaded NSO file.	2019-03-22 14:26:58 -04:00
Lioncash	ee49e1fcb6	file_sys/patch_manager: Remove two magic values These correspond to the NSOBuildHeader.	2019-03-22 14:17:50 -04:00
bunnei	7b6d516faa	Merge pull request #2234 from lioncash/mutex core/hle/kernel: Make Mutex a per-process class.	2019-03-21 22:18:36 -04:00
bunnei	b78e7b3454	Merge pull request #2274 from lioncash/include core/memory: Remove unnecessary includes	2019-03-21 22:14:27 -04:00
Lioncash	c221308a66	core/memory: Remove unnecessary includes In `93da8e0abf`, the page table construct was moved to the common library (which utilized these inclusions). Since the move, nothing requires these headers to be included within the memory header.	2019-03-21 09:48:54 -04:00
bunnei	2117edd0f8	memory_manager: Cleanup FindFreeRegion.	2019-03-20 23:12:28 -04:00
bunnei	5a5fccaa23	memory_manager: Use Common::AlignUp in public interface as needed.	2019-03-20 22:58:49 -04:00
bunnei	72837e4b3d	memory_manager: Bug fixes and further cleanup.	2019-03-20 22:36:03 -04:00
bunnei	3ae0de9b53	memory: Check that core is powered on before attempting to use GPU. - GPU will be released on shutdown, before pages are unmapped. - On subsequent runs, current_page_table will be not nullptr, but GPU might not be valid yet.	2019-03-20 22:36:03 -04:00
bunnei	19330f45d3	maxwell_dma: Check for valid source in destination before copy. - Avoid a crash in Octopath Traveler.	2019-03-20 22:36:03 -04:00
bunnei	197dcf0b5e	memory_manager: Add protections for invalid GPU addresses. - Avoid a crash in Xenoblade Chronicles 2.	2019-03-20 22:36:03 -04:00
bunnei	21eb4cfa7f	gl_rasterizer_cache: Check that backing memory is valid before creating a surface. - Fixes a crash in Puyo Puyo Tetris.	2019-03-20 22:36:02 -04:00
bunnei	22d3dfbcd4	gpu: Rewrite virtual memory manager using PageTable.	2019-03-20 22:36:02 -04:00
bunnei	241563d15c	gpu: Move GPUVAddr definition to common_types.	2019-03-20 22:36:02 -04:00
Lioncash	e6612d6d8d	CMakeLists: Move off of modifying CMAKE_-related flags Modifying CMAKE_ related flags directly applies those changes to every single CMake target. This includes even the targets we have in the externals directory. So, if we ever increased our warning levels, or enabled particular ones, or enabled any other compilation setting, then this would apply to externals as well, which is often not desirable. This makes our compilation flag setup less error prone by only applying our settings to our targets and leaving the externals alone entirely. This also means we don't end up clobbering any provided flags on the command line either, allowing users to specifically use the flags they want.	2019-03-17 06:55:24 -04:00
Lioncash	13bc74e957	CMakeLists: Move compilation flags into the src directory We generally shouldn't be hijacking CMAKE_CXX_FLAGS, etc as a means to append flags to the targets, since this adds the compilation flags to everything, including our externals, which can result in weird issues and makes the build hierarchy fragile. Instead, we want to just apply these compilation flags to our targets, and let those managing external libraries to properly specify their compilation flags. This also results in us not getting as many warnings, as we don't raise the warning level on every external target.	2019-03-17 01:49:09 -04:00
Lioncash	d71cad6ed0	core/hle/kernel/mutex: Remove usages of global system accessors Removes the use of global system accessors, and instead uses the explicit interface provided.	2019-03-14 20:55:52 -04:00
Lioncash	555cd26ec2	core/hle/kernel: Make Mutex a per-process class. Makes it an instantiable class like it is in the actual kernel. This will also allow removing reliance on global accessors in a following change, now that we can encapsulate a reference to the system instance in the class.	2019-03-14 20:55:52 -04:00
Lioncash	5379063108	core/hle/kernel/svc: Implement svcUnmapTransferMemory Similarly, like svcMapTransferMemory, we can also implement svcUnmapTransferMemory fairly trivially as well.	2019-03-13 06:04:49 -04:00
Lioncash	567134f874	core/hle/kernel/svc: Implement svcMapTransferMemory Now that transfer memory handling is separated from shared memory, we can implement svcMapTransferMemory pretty trivially.	2019-03-13 06:04:49 -04:00
Lioncash	cb198d7985	core/hle/kernel: Split transfer memory handling out into its own class Within the kernel, shared memory and transfer memory facilities exist as completely different kernel objects. They also have different validity checking as well. Therefore, we shouldn't be treating the two as the same kind of memory. They also differ in terms of their behavioral aspect as well. Shared memory is intended for sharing memory between processes, while transfer memory is intended to be for transferring memory to other processes. This breaks out the handling for transfer memory into its own class and treats it as its own kernel object. This is also important when we consider resource limits as well. Particularly because transfer memory is limited by the resource limit value set for it. While we currently don't handle resource limit testing against objects yet (but we do allow setting them), this will make implementing that behavior much easier in the future, as we don't need to distinguish between shared memory and transfer memory allocations in the same place.	2019-03-13 06:04:44 -04:00
Zach Hilman	cd2921a047	set_sys: Move constants to anonymous namespace	2019-03-11 11:16:35 -04:00
Zach Hilman	debc7442f2	set_sys: Use official nintendo version string	2019-03-10 19:54:13 -04:00
Zach Hilman	73f2ee5484	system_version: Correct sizes on VectorVfsFile construction	2019-03-10 19:16:17 -04:00
Zach Hilman	597c00698d	set_sys: Use correct error codes in GetFirmwareVersion*	2019-03-10 19:09:23 -04:00
Zach Hilman	ed82bb968a	set_sys: Implement GetFirmwareVersion(2) for libnx hosversion Uses the synthesized system archive 9 (SystemVersion) and reports v5.1.0-0.0	2019-03-10 16:51:42 -04:00