fsp_srv: stub GetCacheStorageSize

Merge pull request #9016 from liamwhite/drunken-schedule
vk_scheduler: wait for command processing to complete
2022-10-08 12:24:00 -04:00 · 2022-10-07 20:27:16 -04:00 · 2022-10-07 20:25:51 -04:00 · 2022-10-07 20:25:23 -04:00 · 2022-10-07 17:39:39 -04:00 · 2022-10-07 15:11:26 -04:00
205 changed files with 6860 additions and 3380 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -252,7 +252,7 @@ if(ENABLE_QT)
            endif()

            # Check for headers
-            Include(FindPkgConfig REQUIRED)
+            find_package(PkgConfig REQUIRED)
            pkg_check_modules(QT_DEP_GLU QUIET glu>=9.0.0)
            if (NOT QT_DEP_GLU_FOUND)
                message(FATAL_ERROR "Qt bundled pacakge dependency `glu` not found. \
@@ -386,7 +386,7 @@ endif()

 # Ensure libusb is properly configured (based on dolphin libusb include)
 if(NOT APPLE AND NOT YUZU_USE_BUNDLED_LIBUSB)
-    include(FindPkgConfig)
+    find_package(PkgConfig)
    if (PKG_CONFIG_FOUND AND NOT CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD")
        pkg_check_modules(LIBUSB QUIET libusb-1.0>=1.0.24)
    else()
@@ -410,7 +410,7 @@ set(FFmpeg_COMPONENTS
    swscale)

 if (UNIX AND NOT APPLE)
-    Include(FindPkgConfig REQUIRED)
+    find_package(PkgConfig REQUIRED)
    pkg_check_modules(LIBVA libva)
 endif()
 if (NOT YUZU_USE_BUNDLED_FFMPEG)
--- a/externals/ffmpeg/CMakeLists.txt
+++ b/externals/ffmpeg/CMakeLists.txt
@@ -43,7 +43,7 @@ if (NOT WIN32)
            CACHE PATH "Paths to FFmpeg libraries" FORCE)
    endforeach()

-    Include(FindPkgConfig REQUIRED)
+    find_package(PkgConfig REQUIRED)
    pkg_check_modules(LIBVA libva)
    pkg_check_modules(CUDA cuda)
    pkg_check_modules(FFNVCODEC ffnvcodec)
--- a/externals/libusb/CMakeLists.txt
+++ b/externals/libusb/CMakeLists.txt
@@ -108,7 +108,7 @@ if (MINGW OR (${CMAKE_SYSTEM_NAME} MATCHES "Linux") OR APPLE)
    target_include_directories(usb INTERFACE "${LIBUSB_INCLUDE_DIRS}")

    if (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
-        Include(FindPkgConfig)
+        find_package(PkgConfig)
        pkg_check_modules(LIBUDEV REQUIRED libudev)

        if (LIBUDEV_FOUND)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -121,6 +121,7 @@ else()

    if (ARCHITECTURE_x86_64)
        add_compile_options("-mcx16")
+        add_compile_options("-fwrapv")
    endif()

    if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
--- a/src/audio_core/renderer/adsp/audio_renderer.cpp
+++ b/src/audio_core/renderer/adsp/audio_renderer.cpp
@@ -132,7 +132,7 @@ void AudioRenderer::CreateSinkStreams() {
 }

 void AudioRenderer::ThreadFunc() {
-    constexpr char name[]{"yuzu:AudioRenderer"};
+    constexpr char name[]{"AudioRenderer"};
    MicroProfileOnThreadCreate(name);
    Common::SetCurrentThreadName(name);
    Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);
--- a/src/audio_core/renderer/system_manager.cpp
+++ b/src/audio_core/renderer/system_manager.cpp
@@ -94,7 +94,7 @@ bool SystemManager::Remove(System& system_) {
 }

 void SystemManager::ThreadFunc() {
-    constexpr char name[]{"yuzu:AudioRenderSystemManager"};
+    constexpr char name[]{"AudioRenderSystemManager"};
    MicroProfileOnThreadCreate(name);
    Common::SetCurrentThreadName(name);
    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -17,6 +17,8 @@ endif ()
 include(GenerateSCMRev)

 add_library(common STATIC
+    address_space.cpp
+    address_space.h
    algorithm.h
    alignment.h
    announce_multiplayer_room.h
@@ -81,6 +83,8 @@ add_library(common STATIC
    microprofile.cpp
    microprofile.h
    microprofileui.h
+    multi_level_page_table.cpp
+    multi_level_page_table.h
    nvidia_flags.cpp
    nvidia_flags.h
    page_table.cpp
--- a/src/common/address_space.cpp
+++ b/src/common/address_space.cpp
@@ -0,0 +1,10 @@
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "common/address_space.inc"
+
+namespace Common {
+
+template class Common::FlatAllocator<u32, 0, 32>;
+
+}
--- a/src/common/address_space.h
+++ b/src/common/address_space.h
@@ -0,0 +1,150 @@
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <concepts>
+#include <functional>
+#include <mutex>
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Common {
+template <typename VaType, size_t AddressSpaceBits>
+concept AddressSpaceValid = std::is_unsigned_v<VaType> && sizeof(VaType) * 8 >= AddressSpaceBits;
+
+struct EmptyStruct {};
+
+/**
+ * @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector
+ */
+template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa,
+          bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo = EmptyStruct>
+requires AddressSpaceValid<VaType, AddressSpaceBits>
+class FlatAddressSpaceMap {
+public:
+    /// The maximum VA that this AS can technically reach
+    static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) +
+                                      ((1ULL << (AddressSpaceBits - 1)) - 1)};
+
+    explicit FlatAddressSpaceMap(VaType va_limit,
+                                 std::function<void(VaType, VaType)> unmap_callback = {});
+
+    FlatAddressSpaceMap() = default;
+
+    void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info = {}) {
+        std::scoped_lock lock(block_mutex);
+        MapLocked(virt, phys, size, extra_info);
+    }
+
+    void Unmap(VaType virt, VaType size) {
+        std::scoped_lock lock(block_mutex);
+        UnmapLocked(virt, size);
+    }
+
+    VaType GetVALimit() const {
+        return va_limit;
+    }
+
+protected:
+    /**
+     * @brief Represents a block of memory in the AS, the physical mapping is contiguous until
+     * another block with a different phys address is hit
+     */
+    struct Block {
+        /// VA of the block
+        VaType virt{UnmappedVa};
+        /// PA of the block, will increase 1-1 with VA until a new block is encountered
+        PaType phys{UnmappedPa};
+        [[no_unique_address]] ExtraBlockInfo extra_info;
+
+        Block() = default;
+
+        Block(VaType virt_, PaType phys_, ExtraBlockInfo extra_info_)
+            : virt(virt_), phys(phys_), extra_info(extra_info_) {}
+
+        bool Valid() const {
+            return virt != UnmappedVa;
+        }
+
+        bool Mapped() const {
+            return phys != UnmappedPa;
+        }
+
+        bool Unmapped() const {
+            return phys == UnmappedPa;
+        }
+
+        bool operator<(const VaType& p_virt) const {
+            return virt < p_virt;
+        }
+    };
+
+    /**
+     * @brief Maps a PA range into the given AS region
+     * @note block_mutex MUST be locked when calling this
+     */
+    void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info);
+
+    /**
+     * @brief Unmaps the given range and merges it with other unmapped regions
+     * @note block_mutex MUST be locked when calling this
+     */
+    void UnmapLocked(VaType virt, VaType size);
+
+    std::mutex block_mutex;
+    std::vector<Block> blocks{Block{}};
+
+    /// a soft limit on the maximum VA of the AS
+    VaType va_limit{VaMaximum};
+
+private:
+    /// Callback called when the mappings in an region have changed
+    std::function<void(VaType, VaType)> unmap_callback{};
+};
+
+/**
+ * @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an
+ * initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block
+ */
+template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>
+requires AddressSpaceValid<VaType, AddressSpaceBits>
+class FlatAllocator
+    : public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> {
+private:
+    using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
+
+public:
+    explicit FlatAllocator(VaType virt_start, VaType va_limit = Base::VaMaximum);
+
+    /**
+     * @brief Allocates a region in the AS of the given size and returns its address
+     */
+    VaType Allocate(VaType size);
+
+    /**
+     * @brief Marks the given region in the AS as allocated
+     */
+    void AllocateFixed(VaType virt, VaType size);
+
+    /**
+     * @brief Frees an AS region so it can be used again
+     */
+    void Free(VaType virt, VaType size);
+
+    VaType GetVAStart() const {
+        return virt_start;
+    }
+
+private:
+    /// The base VA of the allocator, no allocations will be below this
+    VaType virt_start;
+
+    /**
+     * The end address for the initial linear allocation pass
+     * Once this reaches the AS limit the slower allocation path will be used
+     */
+    VaType current_linear_alloc_end;
+};
+} // namespace Common
--- a/src/common/address_space.inc
+++ b/src/common/address_space.inc
@@ -0,0 +1,366 @@
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "common/address_space.h"
+#include "common/assert.h"
+
+#define MAP_MEMBER(returnType)                                                                     \
+    template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa,              \
+              bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo>                \
+    requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap<           \
+        VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
+#define MAP_MEMBER_CONST()                                                                         \
+    template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa,              \
+              bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo>                \
+    requires AddressSpaceValid<VaType, AddressSpaceBits> FlatAddressSpaceMap<                      \
+        VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
+
+#define MM_MEMBER(returnType)                                                                      \
+    template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>                         \
+    requires AddressSpaceValid<VaType, AddressSpaceBits> returnType                                \
+        FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
+
+#define ALLOC_MEMBER(returnType)                                                                   \
+    template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>                         \
+    requires AddressSpaceValid<VaType, AddressSpaceBits> returnType                                \
+        FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
+#define ALLOC_MEMBER_CONST()                                                                       \
+    template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>                         \
+    requires AddressSpaceValid<VaType, AddressSpaceBits>                                           \
+        FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
+
+namespace Common {
+MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType va_limit_,
+                                        std::function<void(VaType, VaType)> unmap_callback_)
+    : va_limit{va_limit_}, unmap_callback{std::move(unmap_callback_)} {
+    if (va_limit > VaMaximum) {
+        ASSERT_MSG(false, "Invalid VA limit!");
+    }
+}
+
+MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info) {
+    VaType virt_end{virt + size};
+
+    if (virt_end > va_limit) {
+        ASSERT_MSG(false,
+                   "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}",
+                   virt_end, va_limit);
+    }
+
+    auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)};
+    if (block_end_successor == blocks.begin()) {
+        ASSERT_MSG(false, "Trying to map a block before the VA start: virt_end: 0x{:X}", virt_end);
+    }
+
+    auto block_end_predecessor{std::prev(block_end_successor)};
+
+    if (block_end_successor != blocks.end()) {
+        // We have blocks in front of us, if one is directly in front then we don't have to add a
+        // tail
+        if (block_end_successor->virt != virt_end) {
+            PaType tailPhys{[&]() -> PaType {
+                if constexpr (!PaContigSplit) {
+                    // Always propagate unmapped regions rather than calculating offset
+                    return block_end_predecessor->phys;
+                } else {
+                    if (block_end_predecessor->Unmapped()) {
+                        // Always propagate unmapped regions rather than calculating offset
+                        return block_end_predecessor->phys;
+                    } else {
+                        return block_end_predecessor->phys + virt_end - block_end_predecessor->virt;
+                    }
+                }
+            }()};
+
+            if (block_end_predecessor->virt >= virt) {
+                // If this block's start would be overlapped by the map then reuse it as a tail
+                // block
+                block_end_predecessor->virt = virt_end;
+                block_end_predecessor->phys = tailPhys;
+                block_end_predecessor->extra_info = block_end_predecessor->extra_info;
+
+                // No longer predecessor anymore
+                block_end_successor = block_end_predecessor--;
+            } else {
+                // Else insert a new one and we're done
+                blocks.insert(block_end_successor,
+                              {Block(virt, phys, extra_info),
+                               Block(virt_end, tailPhys, block_end_predecessor->extra_info)});
+                if (unmap_callback) {
+                    unmap_callback(virt, size);
+                }
+
+                return;
+            }
+        }
+    } else {
+        // block_end_predecessor will always be unmapped as blocks has to be terminated by an
+        // unmapped chunk
+        if (block_end_predecessor != blocks.begin() && block_end_predecessor->virt >= virt) {
+            // Move the unmapped block start backwards
+            block_end_predecessor->virt = virt_end;
+
+            // No longer predecessor anymore
+            block_end_successor = block_end_predecessor--;
+        } else {
+            // Else insert a new one and we're done
+            blocks.insert(block_end_successor,
+                          {Block(virt, phys, extra_info), Block(virt_end, UnmappedPa, {})});
+            if (unmap_callback) {
+                unmap_callback(virt, size);
+            }
+
+            return;
+        }
+    }
+
+    auto block_start_successor{block_end_successor};
+
+    // Walk the block vector to find the start successor as this is more efficient than another
+    // binary search in most scenarios
+    while (std::prev(block_start_successor)->virt >= virt) {
+        block_start_successor--;
+    }
+
+    // Check that the start successor is either the end block or something in between
+    if (block_start_successor->virt > virt_end) {
+        ASSERT_MSG(false, "Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
+    } else if (block_start_successor->virt == virt_end) {
+        // We need to create a new block as there are none spare that we would overwrite
+        blocks.insert(block_start_successor, Block(virt, phys, extra_info));
+    } else {
+        // Erase overwritten blocks
+        if (auto eraseStart{std::next(block_start_successor)}; eraseStart != block_end_successor) {
+            blocks.erase(eraseStart, block_end_successor);
+        }
+
+        // Reuse a block that would otherwise be overwritten as a start block
+        block_start_successor->virt = virt;
+        block_start_successor->phys = phys;
+        block_start_successor->extra_info = extra_info;
+    }
+
+    if (unmap_callback) {
+        unmap_callback(virt, size);
+    }
+}
+
+MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
+    VaType virt_end{virt + size};
+
+    if (virt_end > va_limit) {
+        ASSERT_MSG(false,
+                   "Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}",
+                   virt_end, va_limit);
+    }
+
+    auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)};
+    if (block_end_successor == blocks.begin()) {
+        ASSERT_MSG(false, "Trying to unmap a block before the VA start: virt_end: 0x{:X}",
+                   virt_end);
+    }
+
+    auto block_end_predecessor{std::prev(block_end_successor)};
+
+    auto walk_back_to_predecessor{[&](auto iter) {
+        while (iter->virt >= virt) {
+            iter--;
+        }
+
+        return iter;
+    }};
+
+    auto erase_blocks_with_end_unmapped{[&](auto unmappedEnd) {
+        auto block_start_predecessor{walk_back_to_predecessor(unmappedEnd)};
+        auto block_start_successor{std::next(block_start_predecessor)};
+
+        auto eraseEnd{[&]() {
+            if (block_start_predecessor->Unmapped()) {
+                // If the start predecessor is unmapped then we can erase everything in our region
+                // and be done
+                return std::next(unmappedEnd);
+            } else {
+                // Else reuse the end predecessor as the start of our unmapped region then erase all
+                // up to it
+                unmappedEnd->virt = virt;
+                return unmappedEnd;
+            }
+        }()};
+
+        // We can't have two unmapped regions after each other
+        if (eraseEnd != blocks.end() &&
+            (eraseEnd == block_start_successor ||
+             (block_start_predecessor->Unmapped() && eraseEnd->Unmapped()))) {
+            ASSERT_MSG(false, "Multiple contiguous unmapped regions are unsupported!");
+        }
+
+        blocks.erase(block_start_successor, eraseEnd);
+    }};
+
+    // We can avoid any splitting logic if these are the case
+    if (block_end_predecessor->Unmapped()) {
+        if (block_end_predecessor->virt > virt) {
+            erase_blocks_with_end_unmapped(block_end_predecessor);
+        }
+
+        if (unmap_callback) {
+            unmap_callback(virt, size);
+        }
+
+        return; // The region is unmapped, bail out early
+    } else if (block_end_successor->virt == virt_end && block_end_successor->Unmapped()) {
+        erase_blocks_with_end_unmapped(block_end_successor);
+
+        if (unmap_callback) {
+            unmap_callback(virt, size);
+        }
+
+        return; // The region is unmapped here and doesn't need splitting, bail out early
+    } else if (block_end_successor == blocks.end()) {
+        // This should never happen as the end should always follow an unmapped block
+        ASSERT_MSG(false, "Unexpected Memory Manager state!");
+    } else if (block_end_successor->virt != virt_end) {
+        // If one block is directly in front then we don't have to add a tail
+
+        // The previous block is mapped so we will need to add a tail with an offset
+        PaType tailPhys{[&]() {
+            if constexpr (PaContigSplit) {
+                return block_end_predecessor->phys + virt_end - block_end_predecessor->virt;
+            } else {
+                return block_end_predecessor->phys;
+            }
+        }()};
+
+        if (block_end_predecessor->virt >= virt) {
+            // If this block's start would be overlapped by the unmap then reuse it as a tail block
+            block_end_predecessor->virt = virt_end;
+            block_end_predecessor->phys = tailPhys;
+
+            // No longer predecessor anymore
+            block_end_successor = block_end_predecessor--;
+        } else {
+            blocks.insert(block_end_successor,
+                          {Block(virt, UnmappedPa, {}),
+                           Block(virt_end, tailPhys, block_end_predecessor->extra_info)});
+            if (unmap_callback) {
+                unmap_callback(virt, size);
+            }
+
+            // The previous block is mapped and ends before
+            return;
+        }
+    }
+
+    // Walk the block vector to find the start predecessor as this is more efficient than another
+    // binary search in most scenarios
+    auto block_start_predecessor{walk_back_to_predecessor(block_end_successor)};
+    auto block_start_successor{std::next(block_start_predecessor)};
+
+    if (block_start_successor->virt > virt_end) {
+        ASSERT_MSG(false, "Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
+    } else if (block_start_successor->virt == virt_end) {
+        // There are no blocks between the start and the end that would let us skip inserting a new
+        // one for head
+
+        // The previous block is may be unmapped, if so we don't need to insert any unmaps after it
+        if (block_start_predecessor->Mapped()) {
+            blocks.insert(block_start_successor, Block(virt, UnmappedPa, {}));
+        }
+    } else if (block_start_predecessor->Unmapped()) {
+        // If the previous block is unmapped
+        blocks.erase(block_start_successor, block_end_predecessor);
+    } else {
+        // Erase overwritten blocks, skipping the first one as we have written the unmapped start
+        // block there
+        if (auto eraseStart{std::next(block_start_successor)}; eraseStart != block_end_successor) {
+            blocks.erase(eraseStart, block_end_successor);
+        }
+
+        // Add in the unmapped block header
+        block_start_successor->virt = virt;
+        block_start_successor->phys = UnmappedPa;
+    }
+
+    if (unmap_callback)
+        unmap_callback(virt, size);
+}
+
+ALLOC_MEMBER_CONST()::FlatAllocator(VaType virt_start_, VaType va_limit_)
+    : Base{va_limit_}, virt_start{virt_start_}, current_linear_alloc_end{virt_start_} {}
+
+ALLOC_MEMBER(VaType)::Allocate(VaType size) {
+    std::scoped_lock lock(this->block_mutex);
+
+    VaType alloc_start{UnmappedVa};
+    VaType alloc_end{current_linear_alloc_end + size};
+
+    // Avoid searching backwards in the address space if possible
+    if (alloc_end >= current_linear_alloc_end && alloc_end <= this->va_limit) {
+        auto alloc_end_successor{
+            std::lower_bound(this->blocks.begin(), this->blocks.end(), alloc_end)};
+        if (alloc_end_successor == this->blocks.begin()) {
+            ASSERT_MSG(false, "First block in AS map is invalid!");
+        }
+
+        auto alloc_end_predecessor{std::prev(alloc_end_successor)};
+        if (alloc_end_predecessor->virt <= current_linear_alloc_end) {
+            alloc_start = current_linear_alloc_end;
+        } else {
+            // Skip over fixed any mappings in front of us
+            while (alloc_end_successor != this->blocks.end()) {
+                if (alloc_end_successor->virt - alloc_end_predecessor->virt < size ||
+                    alloc_end_predecessor->Mapped()) {
+                    alloc_start = alloc_end_predecessor->virt;
+                    break;
+                }
+
+                alloc_end_predecessor = alloc_end_successor++;
+
+                // Use the VA limit to calculate if we can fit in the final block since it has no
+                // successor
+                if (alloc_end_successor == this->blocks.end()) {
+                    alloc_end = alloc_end_predecessor->virt + size;
+
+                    if (alloc_end >= alloc_end_predecessor->virt && alloc_end <= this->va_limit) {
+                        alloc_start = alloc_end_predecessor->virt;
+                    }
+                }
+            }
+        }
+    }
+
+    if (alloc_start != UnmappedVa) {
+        current_linear_alloc_end = alloc_start + size;
+    } else { // If linear allocation overflows the AS then find a gap
+        if (this->blocks.size() <= 2) {
+            ASSERT_MSG(false, "Unexpected allocator state!");
+        }
+
+        auto search_predecessor{this->blocks.begin()};
+        auto search_successor{std::next(search_predecessor)};
+
+        while (search_successor != this->blocks.end() &&
+               (search_successor->virt - search_predecessor->virt < size ||
+                search_predecessor->Mapped())) {
+            search_predecessor = search_successor++;
+        }
+
+        if (search_successor != this->blocks.end()) {
+            alloc_start = search_predecessor->virt;
+        } else {
+            return {}; // AS is full
+        }
+    }
+
+    this->MapLocked(alloc_start, true, size, {});
+    return alloc_start;
+}
+
+ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
+    this->Map(virt, true, size);
+}
+
+ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
+    this->Unmap(virt, size);
+}
+} // namespace Common
--- a/src/common/algorithm.h
+++ b/src/common/algorithm.h
@@ -24,4 +24,12 @@ template <class ForwardIt, class T, class Compare = std::less<>>
    return first != last && !comp(value, *first) ? first : last;
 }

+template <typename T, typename Func, typename... Args>
+T FoldRight(T initial_value, Func&& func, Args&&... args) {
+    T value{initial_value};
+    const auto high_func = [&value, &func]<typename U>(U x) { value = func(value, x); };
+    (std::invoke(high_func, std::forward<Args>(args)), ...);
+    return value;
+}
+
 } // namespace Common
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -18,4 +18,11 @@ struct PairHash {
    }
 };

+template <typename T>
+struct IdentityHash {
+    [[nodiscard]] size_t operator()(T value) const noexcept {
+        return static_cast<size_t>(value);
+    }
+};
+
 } // namespace Common
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -219,7 +219,7 @@ private:

    void StartBackendThread() {
        backend_thread = std::jthread([this](std::stop_token stop_token) {
-            Common::SetCurrentThreadName("yuzu:Log");
+            Common::SetCurrentThreadName("Logger");
            Entry entry;
            const auto write_logs = [this, &entry]() {
                ForEachBackend([&entry](Backend& backend) { backend.Write(entry); });
--- a/src/common/multi_level_page_table.cpp
+++ b/src/common/multi_level_page_table.cpp
@@ -0,0 +1,9 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/multi_level_page_table.inc"
+
+namespace Common {
+template class Common::MultiLevelPageTable<u64>;
+template class Common::MultiLevelPageTable<u32>;
+} // namespace Common
--- a/src/common/multi_level_page_table.h
+++ b/src/common/multi_level_page_table.h
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Common {
+
+template <typename BaseAddr>
+class MultiLevelPageTable final {
+public:
+    constexpr MultiLevelPageTable() = default;
+    explicit MultiLevelPageTable(std::size_t address_space_bits, std::size_t first_level_bits,
+                                 std::size_t page_bits);
+
+    ~MultiLevelPageTable() noexcept;
+
+    MultiLevelPageTable(const MultiLevelPageTable&) = delete;
+    MultiLevelPageTable& operator=(const MultiLevelPageTable&) = delete;
+
+    MultiLevelPageTable(MultiLevelPageTable&& other) noexcept
+        : address_space_bits{std::exchange(other.address_space_bits, 0)},
+          first_level_bits{std::exchange(other.first_level_bits, 0)}, page_bits{std::exchange(
+                                                                          other.page_bits, 0)},
+          first_level_shift{std::exchange(other.first_level_shift, 0)},
+          first_level_chunk_size{std::exchange(other.first_level_chunk_size, 0)},
+          first_level_map{std::move(other.first_level_map)}, base_ptr{std::exchange(other.base_ptr,
+                                                                                    nullptr)} {}
+
+    MultiLevelPageTable& operator=(MultiLevelPageTable&& other) noexcept {
+        address_space_bits = std::exchange(other.address_space_bits, 0);
+        first_level_bits = std::exchange(other.first_level_bits, 0);
+        page_bits = std::exchange(other.page_bits, 0);
+        first_level_shift = std::exchange(other.first_level_shift, 0);
+        first_level_chunk_size = std::exchange(other.first_level_chunk_size, 0);
+        alloc_size = std::exchange(other.alloc_size, 0);
+        first_level_map = std::move(other.first_level_map);
+        base_ptr = std::exchange(other.base_ptr, nullptr);
+        return *this;
+    }
+
+    void ReserveRange(u64 start, std::size_t size);
+
+    [[nodiscard]] const BaseAddr& operator[](std::size_t index) const {
+        return base_ptr[index];
+    }
+
+    [[nodiscard]] BaseAddr& operator[](std::size_t index) {
+        return base_ptr[index];
+    }
+
+    [[nodiscard]] BaseAddr* data() {
+        return base_ptr;
+    }
+
+    [[nodiscard]] const BaseAddr* data() const {
+        return base_ptr;
+    }
+
+private:
+    void AllocateLevel(u64 level);
+
+    std::size_t address_space_bits{};
+    std::size_t first_level_bits{};
+    std::size_t page_bits{};
+    std::size_t first_level_shift{};
+    std::size_t first_level_chunk_size{};
+    std::size_t alloc_size{};
+    std::vector<void*> first_level_map{};
+    BaseAddr* base_ptr{};
+};
+
+} // namespace Common
--- a/src/common/multi_level_page_table.inc
+++ b/src/common/multi_level_page_table.inc
@@ -0,0 +1,84 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#endif
+
+#include "common/assert.h"
+#include "common/multi_level_page_table.h"
+
+namespace Common {
+
+template <typename BaseAddr>
+MultiLevelPageTable<BaseAddr>::MultiLevelPageTable(std::size_t address_space_bits_,
+                                                   std::size_t first_level_bits_,
+                                                   std::size_t page_bits_)
+    : address_space_bits{address_space_bits_},
+      first_level_bits{first_level_bits_}, page_bits{page_bits_} {
+    if (page_bits == 0) {
+      return;
+    }
+    first_level_shift = address_space_bits - first_level_bits;
+    first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr);
+    alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr);
+    std::size_t first_level_size = 1ULL << first_level_bits;
+    first_level_map.resize(first_level_size, nullptr);
+#ifdef _WIN32
+    void* base{VirtualAlloc(nullptr, alloc_size, MEM_RESERVE, PAGE_READWRITE)};
+#else
+    void* base{mmap(nullptr, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)};
+
+    if (base == MAP_FAILED) {
+        base = nullptr;
+    }
+#endif
+
+    ASSERT(base);
+    base_ptr = reinterpret_cast<BaseAddr*>(base);
+}
+
+template <typename BaseAddr>
+MultiLevelPageTable<BaseAddr>::~MultiLevelPageTable() noexcept {
+    if (!base_ptr) {
+        return;
+    }
+#ifdef _WIN32
+    ASSERT(VirtualFree(base_ptr, 0, MEM_RELEASE));
+#else
+    ASSERT(munmap(base_ptr, alloc_size) == 0);
+#endif
+}
+
+template <typename BaseAddr>
+void MultiLevelPageTable<BaseAddr>::ReserveRange(u64 start, std::size_t size) {
+    const u64 new_start = start >> first_level_shift;
+    const u64 new_end = (start + size) >> first_level_shift;
+    for (u64 i = new_start; i <= new_end; i++) {
+        if (!first_level_map[i]) {
+            AllocateLevel(i);
+        }
+    }
+}
+
+template <typename BaseAddr>
+void MultiLevelPageTable<BaseAddr>::AllocateLevel(u64 level) {
+    void* ptr = reinterpret_cast<char *>(base_ptr) + level * first_level_chunk_size;
+#ifdef _WIN32
+    void* base{VirtualAlloc(ptr, first_level_chunk_size, MEM_COMMIT, PAGE_READWRITE)};
+#else
+    void* base{mmap(ptr, first_level_chunk_size, PROT_READ | PROT_WRITE,
+                    MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)};
+
+    if (base == MAP_FAILED) {
+        base = nullptr;
+    }
+#endif
+    ASSERT(base);
+
+    first_level_map[level] = base;
+}
+
+} // namespace Common
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -138,8 +138,6 @@ add_library(core STATIC
    frontend/emu_window.h
    frontend/framebuffer_layout.cpp
    frontend/framebuffer_layout.h
-    hardware_interrupt_manager.cpp
-    hardware_interrupt_manager.h
    hid/emulated_console.cpp
    hid/emulated_console.h
    hid/emulated_controller.cpp
@@ -550,6 +548,12 @@ add_library(core STATIC
    hle/service/ns/ns.h
    hle/service/ns/pdm_qry.cpp
    hle/service/ns/pdm_qry.h
+    hle/service/nvdrv/core/container.cpp
+    hle/service/nvdrv/core/container.h
+    hle/service/nvdrv/core/nvmap.cpp
+    hle/service/nvdrv/core/nvmap.h
+    hle/service/nvdrv/core/syncpoint_manager.cpp
+    hle/service/nvdrv/core/syncpoint_manager.h
    hle/service/nvdrv/devices/nvdevice.h
    hle/service/nvdrv/devices/nvdisp_disp0.cpp
    hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -578,8 +582,6 @@ add_library(core STATIC
    hle/service/nvdrv/nvdrv_interface.h
    hle/service/nvdrv/nvmemp.cpp
    hle/service/nvdrv/nvmemp.h
-    hle/service/nvdrv/syncpoint_manager.cpp
-    hle/service/nvdrv/syncpoint_manager.h
    hle/service/nvflinger/binder.h
    hle/service/nvflinger/buffer_item.h
    hle/service/nvflinger/buffer_item_consumer.cpp
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -27,7 +27,6 @@
 #include "core/file_sys/savedata_factory.h"
 #include "core/file_sys/vfs_concat.h"
 #include "core/file_sys/vfs_real.h"
-#include "core/hardware_interrupt_manager.h"
 #include "core/hid/hid_core.h"
 #include "core/hle/kernel/k_memory_manager.h"
 #include "core/hle/kernel/k_process.h"
@@ -51,6 +50,7 @@
 #include "core/telemetry_session.h"
 #include "core/tools/freezer.h"
 #include "network/network.h"
+#include "video_core/host1x/host1x.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"

@@ -215,6 +215,7 @@ struct System::Impl {

        telemetry_session = std::make_unique<Core::TelemetrySession>();

+        host1x_core = std::make_unique<Tegra::Host1x::Host1x>(system);
        gpu_core = VideoCore::CreateGPU(emu_window, system);
        if (!gpu_core) {
            return SystemResultStatus::ErrorVideoCore;
@@ -224,7 +225,6 @@ struct System::Impl {

        service_manager = std::make_shared<Service::SM::ServiceManager>(kernel);
        services = std::make_unique<Service::Services>(service_manager, system);
-        interrupt_manager = std::make_unique<Hardware::InterruptManager>(system);

        // Initialize time manager, which must happen after kernel is created
        time_manager.Initialize();
@@ -373,6 +373,7 @@ struct System::Impl {
        app_loader.reset();
        audio_core.reset();
        gpu_core.reset();
+        host1x_core.reset();
        perf_stats.reset();
        kernel.Shutdown();
        memory.Reset();
@@ -450,7 +451,7 @@ struct System::Impl {
    /// AppLoader used to load the current executing application
    std::unique_ptr<Loader::AppLoader> app_loader;
    std::unique_ptr<Tegra::GPU> gpu_core;
-    std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
+    std::unique_ptr<Tegra::Host1x::Host1x> host1x_core;
    std::unique_ptr<Core::DeviceMemory> device_memory;
    std::unique_ptr<AudioCore::AudioCore> audio_core;
    Core::Memory::Memory memory;
@@ -668,12 +669,12 @@ const Tegra::GPU& System::GPU() const {
    return *impl->gpu_core;
 }

-Core::Hardware::InterruptManager& System::InterruptManager() {
-    return *impl->interrupt_manager;
+Tegra::Host1x::Host1x& System::Host1x() {
+    return *impl->host1x_core;
 }

-const Core::Hardware::InterruptManager& System::InterruptManager() const {
-    return *impl->interrupt_manager;
+const Tegra::Host1x::Host1x& System::Host1x() const {
+    return *impl->host1x_core;
 }

 VideoCore::RendererBase& System::Renderer() {
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -74,6 +74,9 @@ class TimeManager;
 namespace Tegra {
 class DebugContext;
 class GPU;
+namespace Host1x {
+class Host1x;
+} // namespace Host1x
 } // namespace Tegra

 namespace VideoCore {
@@ -88,10 +91,6 @@ namespace Core::Timing {
 class CoreTiming;
 }

-namespace Core::Hardware {
-class InterruptManager;
-}
-
 namespace Core::HID {
 class HIDCore;
 }
@@ -260,6 +259,12 @@ public:
    /// Gets an immutable reference to the GPU interface.
    [[nodiscard]] const Tegra::GPU& GPU() const;

+    /// Gets a mutable reference to the Host1x interface
+    [[nodiscard]] Tegra::Host1x::Host1x& Host1x();
+
+    /// Gets an immutable reference to the Host1x interface.
+    [[nodiscard]] const Tegra::Host1x::Host1x& Host1x() const;
+
    /// Gets a mutable reference to the renderer.
    [[nodiscard]] VideoCore::RendererBase& Renderer();

@@ -296,12 +301,6 @@ public:
    /// Provides a constant reference to the core timing instance.
    [[nodiscard]] const Timing::CoreTiming& CoreTiming() const;

-    /// Provides a reference to the interrupt manager instance.
-    [[nodiscard]] Core::Hardware::InterruptManager& InterruptManager();
-
-    /// Provides a constant reference to the interrupt manager instance.
-    [[nodiscard]] const Core::Hardware::InterruptManager& InterruptManager() const;
-
    /// Provides a reference to the kernel instance.
    [[nodiscard]] Kernel::KernelCore& Kernel();

--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -43,7 +43,7 @@ CoreTiming::CoreTiming()
 CoreTiming::~CoreTiming() = default;

 void CoreTiming::ThreadEntry(CoreTiming& instance) {
-    constexpr char name[] = "yuzu:HostTiming";
+    constexpr char name[] = "HostTiming";
    MicroProfileOnThreadCreate(name);
    Common::SetCurrentThreadName(name);
    Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -189,9 +189,9 @@ void CpuManager::RunThread(std::size_t core) {
    system.RegisterCoreThread(core);
    std::string name;
    if (is_multicore) {
-        name = "yuzu:CPUCore_" + std::to_string(core);
+        name = "CPUCore_" + std::to_string(core);
    } else {
-        name = "yuzu:CPUThread";
+        name = "CPUThread";
    }
    MicroProfileOnThreadCreate(name.c_str());
    Common::SetCurrentThreadName(name.c_str());
--- a/src/core/debugger/debugger.cpp
+++ b/src/core/debugger/debugger.cpp
@@ -140,7 +140,7 @@ private:
    }

    void ThreadLoop(std::stop_token stop_token) {
-        Common::SetCurrentThreadName("yuzu:Debugger");
+        Common::SetCurrentThreadName("Debugger");

        // Set up the client signals for new data.
        AsyncReceiveInto(signal_pipe, pipe_data, [&](auto d) { PipeData(d); });
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -33,11 +33,55 @@ Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) {
        return Loader::ResultStatus::ErrorBadACIHeader;
    }

-    if (sizeof(FileAccessControl) != file->ReadObject(&acid_file_access, acid_header.fac_offset)) {
+    // Load acid_file_access per-component instead of the entire struct, since this struct does not
+    // reflect the layout of the real data.
+    std::size_t current_offset = acid_header.fac_offset;
+    if (sizeof(FileAccessControl::version) != file->ReadBytes(&acid_file_access.version,
+                                                              sizeof(FileAccessControl::version),
+                                                              current_offset)) {
+        return Loader::ResultStatus::ErrorBadFileAccessControl;
+    }
+    if (sizeof(FileAccessControl::permissions) !=
+        file->ReadBytes(&acid_file_access.permissions, sizeof(FileAccessControl::permissions),
+                        current_offset += sizeof(FileAccessControl::version) + 3)) {
+        return Loader::ResultStatus::ErrorBadFileAccessControl;
+    }
+    if (sizeof(FileAccessControl::unknown) !=
+        file->ReadBytes(&acid_file_access.unknown, sizeof(FileAccessControl::unknown),
+                        current_offset + sizeof(FileAccessControl::permissions))) {
        return Loader::ResultStatus::ErrorBadFileAccessControl;
    }

-    if (sizeof(FileAccessHeader) != file->ReadObject(&aci_file_access, aci_header.fah_offset)) {
+    // Load aci_file_access per-component instead of the entire struct, same as acid_file_access
+    current_offset = aci_header.fah_offset;
+    if (sizeof(FileAccessHeader::version) != file->ReadBytes(&aci_file_access.version,
+                                                             sizeof(FileAccessHeader::version),
+                                                             current_offset)) {
+        return Loader::ResultStatus::ErrorBadFileAccessHeader;
+    }
+    if (sizeof(FileAccessHeader::permissions) !=
+        file->ReadBytes(&aci_file_access.permissions, sizeof(FileAccessHeader::permissions),
+                        current_offset += sizeof(FileAccessHeader::version) + 3)) {
+        return Loader::ResultStatus::ErrorBadFileAccessHeader;
+    }
+    if (sizeof(FileAccessHeader::unk_offset) !=
+        file->ReadBytes(&aci_file_access.unk_offset, sizeof(FileAccessHeader::unk_offset),
+                        current_offset += sizeof(FileAccessHeader::permissions))) {
+        return Loader::ResultStatus::ErrorBadFileAccessHeader;
+    }
+    if (sizeof(FileAccessHeader::unk_size) !=
+        file->ReadBytes(&aci_file_access.unk_size, sizeof(FileAccessHeader::unk_size),
+                        current_offset += sizeof(FileAccessHeader::unk_offset))) {
+        return Loader::ResultStatus::ErrorBadFileAccessHeader;
+    }
+    if (sizeof(FileAccessHeader::unk_offset_2) !=
+        file->ReadBytes(&aci_file_access.unk_offset_2, sizeof(FileAccessHeader::unk_offset_2),
+                        current_offset += sizeof(FileAccessHeader::unk_size))) {
+        return Loader::ResultStatus::ErrorBadFileAccessHeader;
+    }
+    if (sizeof(FileAccessHeader::unk_size_2) !=
+        file->ReadBytes(&aci_file_access.unk_size_2, sizeof(FileAccessHeader::unk_size_2),
+                        current_offset + sizeof(FileAccessHeader::unk_offset_2))) {
        return Loader::ResultStatus::ErrorBadFileAccessHeader;
    }

@@ -152,9 +196,7 @@ void ProgramMetadata::Print() const {
    LOG_DEBUG(Service_FS, " > Is Retail:           {}", acid_header.is_retail ? "YES" : "NO");
    LOG_DEBUG(Service_FS, "Title ID Min:           0x{:016X}", acid_header.title_id_min);
    LOG_DEBUG(Service_FS, "Title ID Max:           0x{:016X}", acid_header.title_id_max);
-    u64_le permissions_l; // local copy to fix alignment error
-    std::memcpy(&permissions_l, &acid_file_access.permissions, sizeof(permissions_l));
-    LOG_DEBUG(Service_FS, "Filesystem Access:      0x{:016X}\n", permissions_l);
+    LOG_DEBUG(Service_FS, "Filesystem Access:      0x{:016X}\n", acid_file_access.permissions);

    // Begin ACI0 printing (actual perms, unsigned)
    LOG_DEBUG(Service_FS, "Magic:                  {:.4}", aci_header.magic.data());
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -144,20 +144,18 @@ private:

    static_assert(sizeof(AciHeader) == 0x40, "ACI0 header structure size is wrong");

-#pragma pack(push, 1)
-
+    // FileAccessControl and FileAccessHeader need loaded per-component: this layout does not
+    // reflect the real layout to avoid reference binding to misaligned addresses
    struct FileAccessControl {
        u8 version;
-        INSERT_PADDING_BYTES(3);
+        // 3 padding bytes
        u64_le permissions;
        std::array<u8, 0x20> unknown;
    };

-    static_assert(sizeof(FileAccessControl) == 0x2C, "FS access control structure size is wrong");
-
    struct FileAccessHeader {
        u8 version;
-        INSERT_PADDING_BYTES(3);
+        // 3 padding bytes
        u64_le permissions;
        u32_le unk_offset;
        u32_le unk_size;
@@ -165,10 +163,6 @@ private:
        u32_le unk_size_2;
    };

-    static_assert(sizeof(FileAccessHeader) == 0x1C, "FS access header structure size is wrong");
-
-#pragma pack(pop)
-
    Header npdm_header;
    AciHeader aci_header;
    AcidHeader acid_header;
--- a/src/core/hardware_interrupt_manager.cpp
+++ b/src/core/hardware_interrupt_manager.cpp
@@ -1,32 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include "core/core.h"
-#include "core/core_timing.h"
-#include "core/hardware_interrupt_manager.h"
-#include "core/hle/service/nvdrv/nvdrv_interface.h"
-#include "core/hle/service/sm/sm.h"
-
-namespace Core::Hardware {
-
-InterruptManager::InterruptManager(Core::System& system_in) : system(system_in) {
-    gpu_interrupt_event = Core::Timing::CreateEvent(
-        "GPUInterrupt",
-        [this](std::uintptr_t message, u64 time,
-               std::chrono::nanoseconds) -> std::optional<std::chrono::nanoseconds> {
-            auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv");
-            const u32 syncpt = static_cast<u32>(message >> 32);
-            const u32 value = static_cast<u32>(message);
-            nvdrv->SignalGPUInterruptSyncpt(syncpt, value);
-            return std::nullopt;
-        });
-}
-
-InterruptManager::~InterruptManager() = default;
-
-void InterruptManager::GPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
-    const u64 msg = (static_cast<u64>(syncpoint_id) << 32ULL) | value;
-    system.CoreTiming().ScheduleEvent(std::chrono::nanoseconds{10}, gpu_interrupt_event, msg);
-}
-
-} // namespace Core::Hardware
--- a/src/core/hardware_interrupt_manager.h
+++ b/src/core/hardware_interrupt_manager.h
@@ -1,32 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <memory>
-
-#include "common/common_types.h"
-
-namespace Core {
-class System;
-}
-
-namespace Core::Timing {
-struct EventType;
-}
-
-namespace Core::Hardware {
-
-class InterruptManager {
-public:
-    explicit InterruptManager(Core::System& system);
-    ~InterruptManager();
-
-    void GPUInterruptSyncpt(u32 syncpoint_id, u32 value);
-
-private:
-    Core::System& system;
-    std::shared_ptr<Core::Timing::EventType> gpu_interrupt_event;
-};
-
-} // namespace Core::Hardware
--- a/src/core/hid/emulated_controller.cpp
+++ b/src/core/hid/emulated_controller.cpp
@@ -1017,9 +1017,11 @@ bool EmulatedController::SetPollingMode(Common::Input::PollingMode polling_mode)
    auto& output_device = output_devices[static_cast<std::size_t>(DeviceIndex::Right)];
    auto& nfc_output_device = output_devices[3];

-    nfc_output_device->SetPollingMode(polling_mode);
+    const auto virtual_nfc_result = nfc_output_device->SetPollingMode(polling_mode);
+    const auto mapped_nfc_result = output_device->SetPollingMode(polling_mode);

-    return output_device->SetPollingMode(polling_mode) == Common::Input::PollingError::None;
+    return virtual_nfc_result == Common::Input::PollingError::None ||
+           mapped_nfc_result == Common::Input::PollingError::None;
 }

 bool EmulatedController::SetCameraFormat(
--- a/src/core/hle/kernel/k_worker_task_manager.cpp
+++ b/src/core/hle/kernel/k_worker_task_manager.cpp
@@ -23,7 +23,7 @@ void KWorkerTask::DoWorkerTask() {
    }
 }

-KWorkerTaskManager::KWorkerTaskManager() : m_waiting_thread(1, "yuzu:KWorkerTaskManager") {}
+KWorkerTaskManager::KWorkerTaskManager() : m_waiting_thread(1, "KWorkerTaskManager") {}

 void KWorkerTaskManager::AddTask(KernelCore& kernel, WorkerType type, KWorkerTask* task) {
    ASSERT(type <= WorkerType::Count);
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -48,7 +48,7 @@ namespace Kernel {
 struct KernelCore::Impl {
    explicit Impl(Core::System& system_, KernelCore& kernel_)
        : time_manager{system_},
-          service_threads_manager{1, "yuzu:ServiceThreadsManager"}, system{system_} {}
+          service_threads_manager{1, "ServiceThreadsManager"}, system{system_} {}

    void SetMulticore(bool is_multi) {
        is_multicore = is_multi;
--- a/src/core/hle/kernel/service_thread.cpp
+++ b/src/core/hle/kernel/service_thread.cpp
@@ -36,7 +36,7 @@ ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std
    : service_name{name} {
    for (std::size_t i = 0; i < num_threads; ++i) {
        threads.emplace_back([this, &kernel](std::stop_token stop_token) {
-            Common::SetCurrentThreadName(std::string{"yuzu:HleService:" + service_name}.c_str());
+            Common::SetCurrentThreadName(std::string{service_name}.c_str());

            // Wait for first request before trying to acquire a render context
            {
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -707,7 +707,7 @@ FSP_SRV::FSP_SRV(Core::System& system_)
        {31, nullptr, "OpenGameCardFileSystem"},
        {32, nullptr, "ExtendSaveDataFileSystem"},
        {33, nullptr, "DeleteCacheStorage"},
-        {34, nullptr, "GetCacheStorageSize"},
+        {34, &FSP_SRV::GetCacheStorageSize, "GetCacheStorageSize"},
        {35, nullptr, "CreateSaveDataFileSystemByHashSalt"},
        {36, nullptr, "OpenHostFileSystemWithOption"},
        {51, &FSP_SRV::OpenSaveDataFileSystem, "OpenSaveDataFileSystem"},
@@ -1107,6 +1107,18 @@ void FSP_SRV::GetProgramIndexForAccessLog(Kernel::HLERequestContext& ctx) {
    rb.Push(access_log_program_index);
 }

+void FSP_SRV::GetCacheStorageSize(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto index{rp.Pop<s32>()};
+
+    LOG_WARNING(Service_FS, "(STUBBED) called with index={}", index);
+
+    IPC::ResponseBuilder rb{ctx, 6};
+    rb.Push(ResultSuccess);
+    rb.Push(s64{0});
+    rb.Push(s64{0});
+}
+
 class IMultiCommitManager final : public ServiceFramework<IMultiCommitManager> {
 public:
    explicit IMultiCommitManager(Core::System& system_)
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -54,6 +54,7 @@ private:
    void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx);
    void GetProgramIndexForAccessLog(Kernel::HLERequestContext& ctx);
    void OpenMultiCommitManager(Kernel::HLERequestContext& ctx);
+    void GetCacheStorageSize(Kernel::HLERequestContext& ctx);

    FileSystemController& fsc;
    const FileSys::ContentProvider& content_provider;
--- a/src/core/hle/service/nfp/amiibo_crypto.cpp
+++ b/src/core/hle/service/nfp/amiibo_crypto.cpp
@@ -28,7 +28,7 @@ bool IsAmiiboValid(const EncryptedNTAG215File& ntag_file) {
    LOG_DEBUG(Service_NFP, "model_number=0x{0:x}",
              static_cast<u16>(amiibo_data.model_info.model_number));
    LOG_DEBUG(Service_NFP, "series={}", amiibo_data.model_info.series);
-    LOG_DEBUG(Service_NFP, "fixed_value=0x{0:x}", amiibo_data.model_info.constant_value);
+    LOG_DEBUG(Service_NFP, "tag_type=0x{0:x}", amiibo_data.model_info.tag_type);

    LOG_DEBUG(Service_NFP, "tag_dynamic_lock=0x{0:x}", ntag_file.dynamic_lock);
    LOG_DEBUG(Service_NFP, "tag_CFG0=0x{0:x}", ntag_file.CFG0);
@@ -55,7 +55,7 @@ bool IsAmiiboValid(const EncryptedNTAG215File& ntag_file) {
    if (amiibo_data.constant_value != 0xA5) {
        return false;
    }
-    if (amiibo_data.model_info.constant_value != 0x02) {
+    if (amiibo_data.model_info.tag_type != PackedTagType::Type2) {
        return false;
    }
    if ((ntag_file.dynamic_lock & 0xFFFFFF) != 0x0F0001U) {
--- a/src/core/hle/service/nfp/nfp_device.cpp
+++ b/src/core/hle/service/nfp/nfp_device.cpp
@@ -98,11 +98,6 @@ bool NfpDevice::LoadAmiibo(std::span<const u8> data) {

    memcpy(&encrypted_tag_data, data.data(), sizeof(EncryptedNTAG215File));

-    if (!AmiiboCrypto::IsAmiiboValid(encrypted_tag_data)) {
-        LOG_INFO(Service_NFP, "Invalid amiibo");
-        return false;
-    }
-
    device_state = DeviceState::TagFound;
    deactivate_event->GetReadableEvent().Clear();
    activate_event->GetWritableEvent().Signal();
@@ -148,20 +143,28 @@ void NfpDevice::Finalize() {
 }

 Result NfpDevice::StartDetection(s32 protocol_) {
-    if (device_state == DeviceState::Initialized || device_state == DeviceState::TagRemoved) {
-        npad_device->SetPollingMode(Common::Input::PollingMode::NFC);
-        device_state = DeviceState::SearchingForTag;
-        protocol = protocol_;
-        return ResultSuccess;
+    if (device_state != DeviceState::Initialized && device_state != DeviceState::TagRemoved) {
+        LOG_ERROR(Service_NFP, "Wrong device state {}", device_state);
+        return WrongDeviceState;
    }

-    LOG_ERROR(Service_NFP, "Wrong device state {}", device_state);
-    return WrongDeviceState;
+    if (!npad_device->SetPollingMode(Common::Input::PollingMode::NFC)) {
+        LOG_ERROR(Service_NFP, "Nfc not supported");
+        return NfcDisabled;
+    }
+
+    device_state = DeviceState::SearchingForTag;
+    protocol = protocol_;
+    return ResultSuccess;
 }

 Result NfpDevice::StopDetection() {
    npad_device->SetPollingMode(Common::Input::PollingMode::Active);

+    if (device_state == DeviceState::Initialized) {
+        return ResultSuccess;
+    }
+
    if (device_state == DeviceState::TagFound || device_state == DeviceState::TagMounted) {
        CloseAmiibo();
        return ResultSuccess;
@@ -225,6 +228,11 @@ Result NfpDevice::Mount(MountTarget mount_target_) {
        return WrongDeviceState;
    }

+    if (!AmiiboCrypto::IsAmiiboValid(encrypted_tag_data)) {
+        LOG_ERROR(Service_NFP, "Not an amiibo");
+        return NotAnAmiibo;
+    }
+
    if (!AmiiboCrypto::DecodeAmiibo(encrypted_tag_data, tag_data)) {
        LOG_ERROR(Service_NFP, "Can't decode amiibo {}", device_state);
        return CorruptedData;
@@ -238,6 +246,9 @@ Result NfpDevice::Mount(MountTarget mount_target_) {
 Result NfpDevice::Unmount() {
    if (device_state != DeviceState::TagMounted) {
        LOG_ERROR(Service_NFP, "Wrong device state {}", device_state);
+        if (device_state == DeviceState::TagRemoved) {
+            return TagRemoved;
+        }
        return WrongDeviceState;
    }

@@ -256,6 +267,9 @@ Result NfpDevice::Unmount() {
 Result NfpDevice::GetTagInfo(TagInfo& tag_info) const {
    if (device_state != DeviceState::TagFound && device_state != DeviceState::TagMounted) {
        LOG_ERROR(Service_NFP, "Wrong device state {}", device_state);
+        if (device_state == DeviceState::TagRemoved) {
+            return TagRemoved;
+        }
        return WrongDeviceState;
    }

@@ -287,12 +301,7 @@ Result NfpDevice::GetCommonInfo(CommonInfo& common_info) const {

    // TODO: Validate this data
    common_info = {
-        .last_write_date =
-            {
-                settings.write_date.GetYear(),
-                settings.write_date.GetMonth(),
-                settings.write_date.GetDay(),
-            },
+        .last_write_date = settings.write_date.GetWriteDate(),
        .write_counter = tag_data.write_counter,
        .version = 0,
        .application_area_size = sizeof(ApplicationArea),
@@ -303,6 +312,9 @@ Result NfpDevice::GetCommonInfo(CommonInfo& common_info) const {
 Result NfpDevice::GetModelInfo(ModelInfo& model_info) const {
    if (device_state != DeviceState::TagMounted) {
        LOG_ERROR(Service_NFP, "Wrong device state {}", device_state);
+        if (device_state == DeviceState::TagRemoved) {
+            return TagRemoved;
+        }
        return WrongDeviceState;
    }

@@ -341,12 +353,7 @@ Result NfpDevice::GetRegisterInfo(RegisterInfo& register_info) const {
    // TODO: Validate this data
    register_info = {
        .mii_char_info = manager.ConvertV3ToCharInfo(tag_data.owner_mii),
-        .creation_date =
-            {
-                settings.init_date.GetYear(),
-                settings.init_date.GetMonth(),
-                settings.init_date.GetDay(),
-            },
+        .creation_date = settings.init_date.GetWriteDate(),
        .amiibo_name = GetAmiiboName(settings),
        .font_region = {},
    };
@@ -478,8 +485,7 @@ Result NfpDevice::GetApplicationArea(std::vector<u8>& data) const {
    }

    if (data.size() > sizeof(ApplicationArea)) {
-        LOG_ERROR(Service_NFP, "Wrong data size {}", data.size());
-        return ResultUnknown;
+        data.resize(sizeof(ApplicationArea));
    }

    memcpy(data.data(), tag_data.application_area.data(), data.size());
@@ -518,7 +524,7 @@ Result NfpDevice::SetApplicationArea(std::span<const u8> data) {

    Common::TinyMT rng{};
    std::memcpy(tag_data.application_area.data(), data.data(), data.size());
-    // HW seems to fill excess data with garbage
+    // Fill remaining data with random numbers
    rng.GenerateRandomBytes(tag_data.application_area.data() + data.size(),
                            sizeof(ApplicationArea) - data.size());

@@ -561,12 +567,12 @@ Result NfpDevice::RecreateApplicationArea(u32 access_id, std::span<const u8> dat

    if (data.size() > sizeof(ApplicationArea)) {
        LOG_ERROR(Service_NFP, "Wrong data size {}", data.size());
-        return ResultUnknown;
+        return WrongApplicationAreaSize;
    }

    Common::TinyMT rng{};
    std::memcpy(tag_data.application_area.data(), data.data(), data.size());
-    // HW seems to fill excess data with garbage
+    // Fill remaining data with random numbers
    rng.GenerateRandomBytes(tag_data.application_area.data() + data.size(),
                            sizeof(ApplicationArea) - data.size());

@@ -612,7 +618,6 @@ u64 NfpDevice::GetHandle() const {
 }

 u32 NfpDevice::GetApplicationAreaSize() const {
-    // Investigate if this value is really constant
    return sizeof(ApplicationArea);
 }

--- a/src/core/hle/service/nfp/nfp_result.h
+++ b/src/core/hle/service/nfp/nfp_result.h
@@ -8,6 +8,8 @@
 namespace Service::NFP {

 constexpr Result DeviceNotFound(ErrorModule::NFP, 64);
+constexpr Result InvalidArgument(ErrorModule::NFP, 65);
+constexpr Result WrongApplicationAreaSize(ErrorModule::NFP, 68);
 constexpr Result WrongDeviceState(ErrorModule::NFP, 73);
 constexpr Result NfcDisabled(ErrorModule::NFP, 80);
 constexpr Result WriteAmiiboFailed(ErrorModule::NFP, 88);
--- a/src/core/hle/service/nfp/nfp_types.h
+++ b/src/core/hle/service/nfp/nfp_types.h
@@ -84,6 +84,15 @@ enum class TagType : u32 {
    Type5, // ISO15693 RW/RO 540 bytes 106kbit/s
 };

+enum class PackedTagType : u8 {
+    None,
+    Type1, // ISO14443A RW 96-2k bytes 106kbit/s
+    Type2, // ISO14443A RW/RO 540 bytes 106kbit/s
+    Type3, // Sony Felica RW/RO 2k bytes 212kbit/s
+    Type4, // ISO14443A RW/RO 4k-32k bytes 424kbit/s
+    Type5, // ISO15693 RW/RO 540 bytes 106kbit/s
+};
+
 enum class TagProtocol : u32 {
    None,
    TypeA, // ISO14443A
@@ -104,6 +113,13 @@ struct TagUuid {
 };
 static_assert(sizeof(TagUuid) == 10, "TagUuid is an invalid size");

+struct WriteDate {
+    u16 year;
+    u8 month;
+    u8 day;
+};
+static_assert(sizeof(WriteDate) == 0x4, "WriteDate is an invalid size");
+
 struct AmiiboDate {
    u16 raw_date{};

@@ -121,6 +137,21 @@ struct AmiiboDate {
        return static_cast<u8>(GetValue() & 0x001F);
    }

+    WriteDate GetWriteDate() const {
+        if (!IsValidDate()) {
+            return {
+                .year = 2000,
+                .month = 1,
+                .day = 1,
+            };
+        }
+        return {
+            .year = GetYear(),
+            .month = GetMonth(),
+            .day = GetDay(),
+        };
+    }
+
    void SetYear(u16 year) {
        const u16 year_converted = static_cast<u16>((year - 2000) << 9);
        raw_date = Common::swap16((GetValue() & ~0xFE00) | year_converted);
@@ -133,6 +164,13 @@ struct AmiiboDate {
        const u16 day_converted = static_cast<u16>(day);
        raw_date = Common::swap16((GetValue() & ~0x001F) | day_converted);
    }
+
+    bool IsValidDate() const {
+        const bool is_day_valid = GetDay() > 0 && GetDay() < 32;
+        const bool is_month_valid = GetMonth() > 0 && GetMonth() < 13;
+        const bool is_year_valid = GetYear() >= 2000;
+        return is_year_valid && is_month_valid && is_day_valid;
+    }
 };
 static_assert(sizeof(AmiiboDate) == 2, "AmiiboDate is an invalid size");

@@ -163,7 +201,7 @@ struct AmiiboModelInfo {
    AmiiboType amiibo_type;
    u16_be model_number;
    AmiiboSeries series;
-    u8 constant_value;         // Must be 02
+    PackedTagType tag_type;
    INSERT_PADDING_BYTES(0x4); // Unknown
 };
 static_assert(sizeof(AmiiboModelInfo) == 0xC, "AmiiboModelInfo is an invalid size");
@@ -250,13 +288,6 @@ struct TagInfo {
 };
 static_assert(sizeof(TagInfo) == 0x58, "TagInfo is an invalid size");

-struct WriteDate {
-    u16 year;
-    u8 month;
-    u8 day;
-};
-static_assert(sizeof(WriteDate) == 0x4, "WriteDate is an invalid size");
-
 struct CommonInfo {
    WriteDate last_write_date;
    u16 write_counter;
--- a/src/core/hle/service/nfp/nfp_user.cpp
+++ b/src/core/hle/service/nfp/nfp_user.cpp
@@ -93,6 +93,18 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) {
        return;
    }

+    if (!ctx.CanWriteBuffer()) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(InvalidArgument);
+        return;
+    }
+
+    if (ctx.GetWriteBufferSize() == 0) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(InvalidArgument);
+        return;
+    }
+
    std::vector<u64> nfp_devices;
    const std::size_t max_allowed_devices = ctx.GetWriteBufferSize() / sizeof(u64);

@@ -255,6 +267,12 @@ void IUser::GetApplicationArea(Kernel::HLERequestContext& ctx) {
        return;
    }

+    if (!ctx.CanWriteBuffer()) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(InvalidArgument);
+        return;
+    }
+
    auto device = GetNfpDevice(device_handle);

    if (!device.has_value()) {
@@ -283,6 +301,12 @@ void IUser::SetApplicationArea(Kernel::HLERequestContext& ctx) {
        return;
    }

+    if (!ctx.CanReadBuffer()) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(InvalidArgument);
+        return;
+    }
+
    auto device = GetNfpDevice(device_handle);

    if (!device.has_value()) {
@@ -358,6 +382,12 @@ void IUser::CreateApplicationArea(Kernel::HLERequestContext& ctx) {
        return;
    }

+    if (!ctx.CanReadBuffer()) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(InvalidArgument);
+        return;
+    }
+
    auto device = GetNfpDevice(device_handle);

    if (!device.has_value()) {
--- a/src/core/hle/service/nvdrv/core/container.cpp
+++ b/src/core/hle/service/nvdrv/core/container.cpp
@@ -0,0 +1,50 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "core/hle/service/nvdrv/core/container.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
+#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
+#include "video_core/host1x/host1x.h"
+
+namespace Service::Nvidia::NvCore {
+
+struct ContainerImpl {
+    explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_)
+        : file{host1x_}, manager{host1x_}, device_file_data{} {}
+    NvMap file;
+    SyncpointManager manager;
+    Container::Host1xDeviceFileData device_file_data;
+};
+
+Container::Container(Tegra::Host1x::Host1x& host1x_) {
+    impl = std::make_unique<ContainerImpl>(host1x_);
+}
+
+Container::~Container() = default;
+
+NvMap& Container::GetNvMapFile() {
+    return impl->file;
+}
+
+const NvMap& Container::GetNvMapFile() const {
+    return impl->file;
+}
+
+Container::Host1xDeviceFileData& Container::Host1xDeviceFile() {
+    return impl->device_file_data;
+}
+
+const Container::Host1xDeviceFileData& Container::Host1xDeviceFile() const {
+    return impl->device_file_data;
+}
+
+SyncpointManager& Container::GetSyncpointManager() {
+    return impl->manager;
+}
+
+const SyncpointManager& Container::GetSyncpointManager() const {
+    return impl->manager;
+}
+
+} // namespace Service::Nvidia::NvCore
--- a/src/core/hle/service/nvdrv/core/container.h
+++ b/src/core/hle/service/nvdrv/core/container.h
@@ -0,0 +1,52 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <deque>
+#include <memory>
+#include <unordered_map>
+
+#include "core/hle/service/nvdrv/nvdata.h"
+
+namespace Tegra::Host1x {
+class Host1x;
+} // namespace Tegra::Host1x
+
+namespace Service::Nvidia::NvCore {
+
+class NvMap;
+class SyncpointManager;
+
+struct ContainerImpl;
+
+class Container {
+public:
+    explicit Container(Tegra::Host1x::Host1x& host1x);
+    ~Container();
+
+    NvMap& GetNvMapFile();
+
+    const NvMap& GetNvMapFile() const;
+
+    SyncpointManager& GetSyncpointManager();
+
+    const SyncpointManager& GetSyncpointManager() const;
+
+    struct Host1xDeviceFileData {
+        std::unordered_map<DeviceFD, u32> fd_to_id{};
+        std::deque<u32> syncpts_accumulated{};
+        u32 nvdec_next_id{};
+        u32 vic_next_id{};
+    };
+
+    Host1xDeviceFileData& Host1xDeviceFile();
+
+    const Host1xDeviceFileData& Host1xDeviceFile() const;
+
+private:
+    std::unique_ptr<ContainerImpl> impl;
+};
+
+} // namespace Service::Nvidia::NvCore
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -0,0 +1,272 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
+#include "core/memory.h"
+#include "video_core/host1x/host1x.h"
+
+using Core::Memory::YUZU_PAGESIZE;
+
+namespace Service::Nvidia::NvCore {
+NvMap::Handle::Handle(u64 size_, Id id_)
+    : size(size_), aligned_size(size), orig_size(size), id(id_) {
+    flags.raw = 0;
+}
+
+NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) {
+    std::scoped_lock lock(mutex);
+
+    // Handles cannot be allocated twice
+    if (allocated) {
+        return NvResult::AccessDenied;
+    }
+
+    flags = pFlags;
+    kind = pKind;
+    align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign;
+
+    // This flag is only applicable for handles with an address passed
+    if (pAddress) {
+        flags.keep_uncached_after_free.Assign(0);
+    } else {
+        LOG_CRITICAL(Service_NVDRV,
+                     "Mapping nvmap handles without a CPU side address is unimplemented!");
+    }
+
+    size = Common::AlignUp(size, YUZU_PAGESIZE);
+    aligned_size = Common::AlignUp(size, align);
+    address = pAddress;
+    allocated = true;
+
+    return NvResult::Success;
+}
+
+NvResult NvMap::Handle::Duplicate(bool internal_session) {
+    std::scoped_lock lock(mutex);
+    // Unallocated handles cannot be duplicated as duplication requires memory accounting (in HOS)
+    if (!allocated) [[unlikely]] {
+        return NvResult::BadValue;
+    }
+
+    // If we internally use FromId the duplication tracking of handles won't work accurately due to
+    // us not implementing per-process handle refs.
+    if (internal_session) {
+        internal_dupes++;
+    } else {
+        dupes++;
+    }
+
+    return NvResult::Success;
+}
+
+NvMap::NvMap(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {}
+
+void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) {
+    std::scoped_lock lock(handles_lock);
+
+    handles.emplace(handle_description->id, std::move(handle_description));
+}
+
+void NvMap::UnmapHandle(Handle& handle_description) {
+    // Remove pending unmap queue entry if needed
+    if (handle_description.unmap_queue_entry) {
+        unmap_queue.erase(*handle_description.unmap_queue_entry);
+        handle_description.unmap_queue_entry.reset();
+    }
+
+    // Free and unmap the handle from the SMMU
+    host1x.MemoryManager().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address),
+                                 handle_description.aligned_size);
+    host1x.Allocator().Free(handle_description.pin_virt_address,
+                            static_cast<u32>(handle_description.aligned_size));
+    handle_description.pin_virt_address = 0;
+}
+
+bool NvMap::TryRemoveHandle(const Handle& handle_description) {
+    // No dupes left, we can remove from handle map
+    if (handle_description.dupes == 0 && handle_description.internal_dupes == 0) {
+        std::scoped_lock lock(handles_lock);
+
+        auto it{handles.find(handle_description.id)};
+        if (it != handles.end()) {
+            handles.erase(it);
+        }
+
+        return true;
+    } else {
+        return false;
+    }
+}
+
+NvResult NvMap::CreateHandle(u64 size, std::shared_ptr<NvMap::Handle>& result_out) {
+    if (!size) [[unlikely]] {
+        return NvResult::BadValue;
+    }
+
+    u32 id{next_handle_id.fetch_add(HandleIdIncrement, std::memory_order_relaxed)};
+    auto handle_description{std::make_shared<Handle>(size, id)};
+    AddHandle(handle_description);
+
+    result_out = handle_description;
+    return NvResult::Success;
+}
+
+std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) {
+    std::scoped_lock lock(handles_lock);
+    try {
+        return handles.at(handle);
+    } catch (std::out_of_range&) {
+        return nullptr;
+    }
+}
+
+VAddr NvMap::GetHandleAddress(Handle::Id handle) {
+    std::scoped_lock lock(handles_lock);
+    try {
+        return handles.at(handle)->address;
+    } catch (std::out_of_range&) {
+        return 0;
+    }
+}
+
+u32 NvMap::PinHandle(NvMap::Handle::Id handle) {
+    auto handle_description{GetHandle(handle)};
+    if (!handle_description) [[unlikely]] {
+        return 0;
+    }
+
+    std::scoped_lock lock(handle_description->mutex);
+    if (!handle_description->pins) {
+        // If we're in the unmap queue we can just remove ourselves and return since we're already
+        // mapped
+        {
+            // Lock now to prevent our queue entry from being removed for allocation in-between the
+            // following check and erase
+            std::scoped_lock queueLock(unmap_queue_lock);
+            if (handle_description->unmap_queue_entry) {
+                unmap_queue.erase(*handle_description->unmap_queue_entry);
+                handle_description->unmap_queue_entry.reset();
+
+                handle_description->pins++;
+                return handle_description->pin_virt_address;
+            }
+        }
+
+        // If not then allocate some space and map it
+        u32 address{};
+        auto& smmu_allocator = host1x.Allocator();
+        auto& smmu_memory_manager = host1x.MemoryManager();
+        while (!(address =
+                     smmu_allocator.Allocate(static_cast<u32>(handle_description->aligned_size)))) {
+            // Free handles until the allocation succeeds
+            std::scoped_lock queueLock(unmap_queue_lock);
+            if (auto freeHandleDesc{unmap_queue.front()}) {
+                // Handles in the unmap queue are guaranteed not to be pinned so don't bother
+                // checking if they are before unmapping
+                std::scoped_lock freeLock(freeHandleDesc->mutex);
+                if (handle_description->pin_virt_address)
+                    UnmapHandle(*freeHandleDesc);
+            } else {
+                LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");
+            }
+        }
+
+        smmu_memory_manager.Map(static_cast<GPUVAddr>(address), handle_description->address,
+                                handle_description->aligned_size);
+        handle_description->pin_virt_address = address;
+    }
+
+    handle_description->pins++;
+    return handle_description->pin_virt_address;
+}
+
+void NvMap::UnpinHandle(Handle::Id handle) {
+    auto handle_description{GetHandle(handle)};
+    if (!handle_description) {
+        return;
+    }
+
+    std::scoped_lock lock(handle_description->mutex);
+    if (--handle_description->pins < 0) {
+        LOG_WARNING(Service_NVDRV, "Pin count imbalance detected!");
+    } else if (!handle_description->pins) {
+        std::scoped_lock queueLock(unmap_queue_lock);
+
+        // Add to the unmap queue allowing this handle's memory to be freed if needed
+        unmap_queue.push_back(handle_description);
+        handle_description->unmap_queue_entry = std::prev(unmap_queue.end());
+    }
+}
+
+void NvMap::DuplicateHandle(Handle::Id handle, bool internal_session) {
+    auto handle_description{GetHandle(handle)};
+    if (!handle_description) {
+        LOG_CRITICAL(Service_NVDRV, "Unregistered handle!");
+        return;
+    }
+
+    auto result = handle_description->Duplicate(internal_session);
+    if (result != NvResult::Success) {
+        LOG_CRITICAL(Service_NVDRV, "Could not duplicate handle!");
+    }
+}
+
+std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool internal_session) {
+    std::weak_ptr<Handle> hWeak{GetHandle(handle)};
+    FreeInfo freeInfo;
+
+    // We use a weak ptr here so we can tell when the handle has been freed and report that back to
+    // guest
+    if (auto handle_description = hWeak.lock()) {
+        std::scoped_lock lock(handle_description->mutex);
+
+        if (internal_session) {
+            if (--handle_description->internal_dupes < 0)
+                LOG_WARNING(Service_NVDRV, "Internal duplicate count imbalance detected!");
+        } else {
+            if (--handle_description->dupes < 0) {
+                LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!");
+            } else if (handle_description->dupes == 0) {
+                // Force unmap the handle
+                if (handle_description->pin_virt_address) {
+                    std::scoped_lock queueLock(unmap_queue_lock);
+                    UnmapHandle(*handle_description);
+                }
+
+                handle_description->pins = 0;
+            }
+        }
+
+        // Try to remove the shared ptr to the handle from the map, if nothing else is using the
+        // handle then it will now be freed when `handle_description` goes out of scope
+        if (TryRemoveHandle(*handle_description)) {
+            LOG_DEBUG(Service_NVDRV, "Removed nvmap handle: {}", handle);
+        } else {
+            LOG_DEBUG(Service_NVDRV,
+                      "Tried to free nvmap handle: {} but didn't as it still has duplicates",
+                      handle);
+        }
+
+        freeInfo = {
+            .address = handle_description->address,
+            .size = handle_description->size,
+            .was_uncached = handle_description->flags.map_uncached.Value() != 0,
+        };
+    } else {
+        return std::nullopt;
+    }
+
+    // Handle hasn't been freed from memory, set address to 0 to mark that the handle wasn't freed
+    if (!hWeak.expired()) {
+        LOG_DEBUG(Service_NVDRV, "nvmap handle: {} wasn't freed as it is still in use", handle);
+        freeInfo.address = 0;
+    }
+
+    return freeInfo;
+}
+
+} // namespace Service::Nvidia::NvCore
--- a/src/core/hle/service/nvdrv/core/nvmap.h
+++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -0,0 +1,175 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <atomic>
+#include <list>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <unordered_map>
+#include <assert.h>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "core/hle/service/nvdrv/nvdata.h"
+
+namespace Tegra {
+
+namespace Host1x {
+class Host1x;
+} // namespace Host1x
+
+} // namespace Tegra
+
+namespace Service::Nvidia::NvCore {
+/**
+ * @brief The nvmap core class holds the global state for nvmap and provides methods to manage
+ * handles
+ */
+class NvMap {
+public:
+    /**
+     * @brief A handle to a contiguous block of memory in an application's address space
+     */
+    struct Handle {
+        std::mutex mutex;
+
+        u64 align{};      //!< The alignment to use when pinning the handle onto the SMMU
+        u64 size;         //!< Page-aligned size of the memory the handle refers to
+        u64 aligned_size; //!< `align`-aligned size of the memory the handle refers to
+        u64 orig_size;    //!< Original unaligned size of the memory this handle refers to
+
+        s32 dupes{1};          //!< How many guest references there are to this handle
+        s32 internal_dupes{0}; //!< How many emulator-internal references there are to this handle
+
+        using Id = u32;
+        Id id; //!< A globally unique identifier for this handle
+
+        s32 pins{};
+        u32 pin_virt_address{};
+        std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{};
+
+        union Flags {
+            u32 raw;
+            BitField<0, 1, u32> map_uncached; //!< If the handle should be mapped as uncached
+            BitField<2, 1, u32> keep_uncached_after_free; //!< Only applicable when the handle was
+                                                          //!< allocated with a fixed address
+            BitField<4, 1, u32> _unk0_;                   //!< Passed to IOVMM for pins
+        } flags{};
+        static_assert(sizeof(Flags) == sizeof(u32));
+
+        u64 address{}; //!< The memory location in the guest's AS that this handle corresponds to,
+                       //!< this can also be in the nvdrv tmem
+        bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC
+                                     //!< call
+
+        u8 kind{};        //!< Used for memory compression
+        bool allocated{}; //!< If the handle has been allocated with `Alloc`
+
+        u64 dma_map_addr{}; //! remove me after implementing pinning.
+
+        Handle(u64 size, Id id);
+
+        /**
+         * @brief Sets up the handle with the given memory config, can allocate memory from the tmem
+         * if a 0 address is passed
+         */
+        [[nodiscard]] NvResult Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress);
+
+        /**
+         * @brief Increases the dupe counter of the handle for the given session
+         */
+        [[nodiscard]] NvResult Duplicate(bool internal_session);
+
+        /**
+         * @brief Obtains a pointer to the handle's memory and marks the handle it as having been
+         * mapped
+         */
+        u8* GetPointer() {
+            if (!address) {
+                return nullptr;
+            }
+
+            is_shared_mem_mapped = true;
+            return reinterpret_cast<u8*>(address);
+        }
+    };
+
+    /**
+     * @brief Encapsulates the result of a FreeHandle operation
+     */
+    struct FreeInfo {
+        u64 address;       //!< Address the handle referred to before deletion
+        u64 size;          //!< Page-aligned handle size
+        bool was_uncached; //!< If the handle was allocated as uncached
+    };
+
+    explicit NvMap(Tegra::Host1x::Host1x& host1x);
+
+    /**
+     * @brief Creates an unallocated handle of the given size
+     */
+    [[nodiscard]] NvResult CreateHandle(u64 size, std::shared_ptr<NvMap::Handle>& result_out);
+
+    std::shared_ptr<Handle> GetHandle(Handle::Id handle);
+
+    VAddr GetHandleAddress(Handle::Id handle);
+
+    /**
+     * @brief Maps a handle into the SMMU address space
+     * @note This operation is refcounted, the number of calls to this must eventually match the
+     * number of calls to `UnpinHandle`
+     * @return The SMMU virtual address that the handle has been mapped to
+     */
+    u32 PinHandle(Handle::Id handle);
+
+    /**
+     * @brief When this has been called an equal number of times to `PinHandle` for the supplied
+     * handle it will be added to a list of handles to be freed when necessary
+     */
+    void UnpinHandle(Handle::Id handle);
+
+    /**
+     * @brief Tries to duplicate a handle
+     */
+    void DuplicateHandle(Handle::Id handle, bool internal_session = false);
+
+    /**
+     * @brief Tries to free a handle and remove a single dupe
+     * @note If a handle has no dupes left and has no other users a FreeInfo struct will be returned
+     * describing the prior state of the handle
+     */
+    std::optional<FreeInfo> FreeHandle(Handle::Id handle, bool internal_session);
+
+private:
+    std::list<std::shared_ptr<Handle>> unmap_queue{};
+    std::mutex unmap_queue_lock{}; //!< Protects access to `unmap_queue`
+
+    std::unordered_map<Handle::Id, std::shared_ptr<Handle>>
+        handles{};           //!< Main owning map of handles
+    std::mutex handles_lock; //!< Protects access to `handles`
+
+    static constexpr u32 HandleIdIncrement{
+        4}; //!< Each new handle ID is an increment of 4 from the previous
+    std::atomic<u32> next_handle_id{HandleIdIncrement};
+    Tegra::Host1x::Host1x& host1x;
+
+    void AddHandle(std::shared_ptr<Handle> handle);
+
+    /**
+     * @brief Unmaps and frees the SMMU memory region a handle is mapped to
+     * @note Both `unmap_queue_lock` and `handle_description.mutex` MUST be locked when calling this
+     */
+    void UnmapHandle(Handle& handle_description);
+
+    /**
+     * @brief Removes a handle from the map taking its dupes into account
+     * @note handle_description.mutex MUST be locked when calling this
+     * @return If the handle was removed from the map
+     */
+    bool TryRemoveHandle(const Handle& handle_description);
+};
+} // namespace Service::Nvidia::NvCore
--- a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp
+++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp
@@ -0,0 +1,121 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "common/assert.h"
+#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
+#include "video_core/host1x/host1x.h"
+
+namespace Service::Nvidia::NvCore {
+
+SyncpointManager::SyncpointManager(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {
+    constexpr u32 VBlank0SyncpointId{26};
+    constexpr u32 VBlank1SyncpointId{27};
+
+    // Reserve both vblank syncpoints as client managed as they use Continuous Mode
+    // Refer to section 14.3.5.3 of the TRM for more information on Continuous Mode
+    // https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/drm/dc.c#L660
+    ReserveSyncpoint(VBlank0SyncpointId, true);
+    ReserveSyncpoint(VBlank1SyncpointId, true);
+
+    for (u32 syncpoint_id : channel_syncpoints) {
+        if (syncpoint_id) {
+            ReserveSyncpoint(syncpoint_id, false);
+        }
+    }
+}
+
+SyncpointManager::~SyncpointManager() = default;
+
+u32 SyncpointManager::ReserveSyncpoint(u32 id, bool client_managed) {
+    if (syncpoints.at(id).reserved) {
+        ASSERT_MSG(false, "Requested syncpoint is in use");
+        return 0;
+    }
+
+    syncpoints.at(id).reserved = true;
+    syncpoints.at(id).interface_managed = client_managed;
+
+    return id;
+}
+
+u32 SyncpointManager::FindFreeSyncpoint() {
+    for (u32 i{1}; i < syncpoints.size(); i++) {
+        if (!syncpoints[i].reserved) {
+            return i;
+        }
+    }
+    ASSERT_MSG(false, "Failed to find a free syncpoint!");
+    return 0;
+}
+
+u32 SyncpointManager::AllocateSyncpoint(bool client_managed) {
+    std::lock_guard lock(reservation_lock);
+    return ReserveSyncpoint(FindFreeSyncpoint(), client_managed);
+}
+
+void SyncpointManager::FreeSyncpoint(u32 id) {
+    std::lock_guard lock(reservation_lock);
+    ASSERT(syncpoints.at(id).reserved);
+    syncpoints.at(id).reserved = false;
+}
+
+bool SyncpointManager::IsSyncpointAllocated(u32 id) {
+    return (id <= SyncpointCount) && syncpoints[id].reserved;
+}
+
+bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) const {
+    const SyncpointInfo& syncpoint{syncpoints.at(id)};
+
+    if (!syncpoint.reserved) {
+        ASSERT(false);
+        return 0;
+    }
+
+    // If the interface manages counters then we don't keep track of the maximum value as it handles
+    // sanity checking the values then
+    if (syncpoint.interface_managed) {
+        return static_cast<s32>(syncpoint.counter_min - threshold) >= 0;
+    } else {
+        return (syncpoint.counter_max - threshold) >= (syncpoint.counter_min - threshold);
+    }
+}
+
+u32 SyncpointManager::IncrementSyncpointMaxExt(u32 id, u32 amount) {
+    if (!syncpoints.at(id).reserved) {
+        ASSERT(false);
+        return 0;
+    }
+
+    return syncpoints.at(id).counter_max += amount;
+}
+
+u32 SyncpointManager::ReadSyncpointMinValue(u32 id) {
+    if (!syncpoints.at(id).reserved) {
+        ASSERT(false);
+        return 0;
+    }
+
+    return syncpoints.at(id).counter_min;
+}
+
+u32 SyncpointManager::UpdateMin(u32 id) {
+    if (!syncpoints.at(id).reserved) {
+        ASSERT(false);
+        return 0;
+    }
+
+    syncpoints.at(id).counter_min = host1x.GetSyncpointManager().GetHostSyncpointValue(id);
+    return syncpoints.at(id).counter_min;
+}
+
+NvFence SyncpointManager::GetSyncpointFence(u32 id) {
+    if (!syncpoints.at(id).reserved) {
+        ASSERT(false);
+        return NvFence{};
+    }
+
+    return {.id = static_cast<s32>(id), .value = syncpoints.at(id).counter_max};
+}
+
+} // namespace Service::Nvidia::NvCore
--- a/src/core/hle/service/nvdrv/core/syncpoint_manager.h
+++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.h
@@ -0,0 +1,134 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <mutex>
+
+#include "common/common_types.h"
+#include "core/hle/service/nvdrv/nvdata.h"
+
+namespace Tegra::Host1x {
+class Host1x;
+} // namespace Tegra::Host1x
+
+namespace Service::Nvidia::NvCore {
+
+enum class ChannelType : u32 {
+    MsEnc = 0,
+    VIC = 1,
+    GPU = 2,
+    NvDec = 3,
+    Display = 4,
+    NvJpg = 5,
+    TSec = 6,
+    Max = 7
+};
+
+/**
+ * @brief SyncpointManager handles allocating and accessing host1x syncpoints, these are cached
+ * versions of the HW syncpoints which are intermittently synced
+ * @note Refer to Chapter 14 of the Tegra X1 TRM for an exhaustive overview of them
+ * @url https://http.download.nvidia.com/tegra-public-appnotes/host1x.html
+ * @url
+ * https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/jetson-tx1/drivers/video/tegra/host/nvhost_syncpt.c
+ */
+class SyncpointManager final {
+public:
+    explicit SyncpointManager(Tegra::Host1x::Host1x& host1x);
+    ~SyncpointManager();
+
+    /**
+     * @brief Checks if the given syncpoint is both allocated and below the number of HW syncpoints
+     */
+    bool IsSyncpointAllocated(u32 id);
+
+    /**
+     * @brief Finds a free syncpoint and reserves it
+     * @return The ID of the reserved syncpoint
+     */
+    u32 AllocateSyncpoint(bool client_managed);
+
+    /**
+     * @url
+     * https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/syncpt.c#L259
+     */
+    bool HasSyncpointExpired(u32 id, u32 threshold) const;
+
+    bool IsFenceSignalled(NvFence fence) const {
+        return HasSyncpointExpired(fence.id, fence.value);
+    }
+
+    /**
+     * @brief Atomically increments the maximum value of a syncpoint by the given amount
+     * @return The new max value of the syncpoint
+     */
+    u32 IncrementSyncpointMaxExt(u32 id, u32 amount);
+
+    /**
+     * @return The minimum value of the syncpoint
+     */
+    u32 ReadSyncpointMinValue(u32 id);
+
+    /**
+     * @brief Synchronises the minimum value of the syncpoint to with the GPU
+     * @return The new minimum value of the syncpoint
+     */
+    u32 UpdateMin(u32 id);
+
+    /**
+     * @brief Frees the usage of a syncpoint.
+     */
+    void FreeSyncpoint(u32 id);
+
+    /**
+     * @return A fence that will be signalled once this syncpoint hits its maximum value
+     */
+    NvFence GetSyncpointFence(u32 id);
+
+    static constexpr std::array<u32, static_cast<u32>(ChannelType::Max)> channel_syncpoints{
+        0x0,  // `MsEnc` is unimplemented
+        0xC,  // `VIC`
+        0x0,  // `GPU` syncpoints are allocated per-channel instead
+        0x36, // `NvDec`
+        0x0,  // `Display` is unimplemented
+        0x37, // `NvJpg`
+        0x0,  // `TSec` is unimplemented
+    };        //!< Maps each channel ID to a constant syncpoint
+
+private:
+    /**
+     * @note reservation_lock should be locked when calling this
+     */
+    u32 ReserveSyncpoint(u32 id, bool client_managed);
+
+    /**
+     * @return The ID of the first free syncpoint
+     */
+    u32 FindFreeSyncpoint();
+
+    struct SyncpointInfo {
+        std::atomic<u32> counter_min; //!< The least value the syncpoint can be (The value it was
+                                      //!< when it was last synchronized with host1x)
+        std::atomic<u32> counter_max; //!< The maximum value the syncpoint can reach according to
+                                      //!< the current usage
+        bool interface_managed; //!< If the syncpoint is managed by a host1x client interface, a
+                                //!< client interface is a HW block that can handle host1x
+                                //!< transactions on behalf of a host1x client (Which would
+                                //!< otherwise need to be manually synced using PIO which is
+                                //!< synchronous and requires direct cooperation of the CPU)
+        bool reserved; //!< If the syncpoint is reserved or not, not to be confused with a reserved
+                       //!< value
+    };
+
+    constexpr static std::size_t SyncpointCount{192};
+    std::array<SyncpointInfo, SyncpointCount> syncpoints{};
+    std::mutex reservation_lock;
+
+    Tegra::Host1x::Host1x& host1x;
+};
+
+} // namespace Service::Nvidia::NvCore
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -11,6 +11,10 @@ namespace Core {
 class System;
 }

+namespace Kernel {
+class KEvent;
+}
+
 namespace Service::Nvidia::Devices {

 /// Represents an abstract nvidia device node. It is to be subclassed by concrete device nodes to
@@ -64,6 +68,10 @@ public:
     */
    virtual void OnClose(DeviceFD fd) = 0;

+    virtual Kernel::KEvent* QueryEvent(u32 event_id) {
+        return nullptr;
+    }
+
 protected:
    Core::System& system;
 };
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -5,15 +5,16 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/hle/service/nvdrv/core/container.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
-#include "core/hle/service/nvdrv/devices/nvmap.h"
 #include "core/perf_stats.h"
 #include "video_core/gpu.h"

 namespace Service::Nvidia::Devices {

-nvdisp_disp0::nvdisp_disp0(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_)
-    : nvdevice{system_}, nvmap_dev{std::move(nvmap_dev_)} {}
+nvdisp_disp0::nvdisp_disp0(Core::System& system_, NvCore::Container& core)
+    : nvdevice{system_}, container{core}, nvmap{core.GetNvMapFile()} {}
 nvdisp_disp0::~nvdisp_disp0() = default;

 NvResult nvdisp_disp0::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -39,8 +40,9 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}

 void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width,
                        u32 height, u32 stride, android::BufferTransformFlags transform,
-                        const Common::Rectangle<int>& crop_rect) {
-    const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
+                        const Common::Rectangle<int>& crop_rect,
+                        std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) {
+    const VAddr addr = nvmap.GetHandleAddress(buffer_handle);
    LOG_TRACE(Service,
              "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
              addr, offset, width, height, stride, format);
@@ -48,10 +50,15 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
    const Tegra::FramebufferConfig framebuffer{addr,   offset, width,     height,
                                               stride, format, transform, crop_rect};

+    system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences);
    system.GetPerfStats().EndSystemFrame();
-    system.GPU().SwapBuffers(&framebuffer);
    system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
    system.GetPerfStats().BeginSystemFrame();
 }

+Kernel::KEvent* nvdisp_disp0::QueryEvent(u32 event_id) {
+    LOG_CRITICAL(Service_NVDRV, "Unknown DISP Event {}", event_id);
+    return nullptr;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -11,13 +11,18 @@
 #include "core/hle/service/nvflinger/buffer_transform_flags.h"
 #include "core/hle/service/nvflinger/pixel_format.h"

+namespace Service::Nvidia::NvCore {
+class Container;
+class NvMap;
+} // namespace Service::Nvidia::NvCore
+
 namespace Service::Nvidia::Devices {

 class nvmap;

 class nvdisp_disp0 final : public nvdevice {
 public:
-    explicit nvdisp_disp0(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_);
+    explicit nvdisp_disp0(Core::System& system_, NvCore::Container& core);
    ~nvdisp_disp0() override;

    NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -33,10 +38,14 @@ public:
    /// Performs a screen flip, drawing the buffer pointed to by the handle.
    void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height,
              u32 stride, android::BufferTransformFlags transform,
-              const Common::Rectangle<int>& crop_rect);
+              const Common::Rectangle<int>& crop_rect,
+              std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences);
+
+    Kernel::KEvent* QueryEvent(u32 event_id) override;

 private:
-    std::shared_ptr<nvmap> nvmap_dev;
+    NvCore::Container& container;
+    NvCore::NvMap& nvmap;
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -1,21 +1,30 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later

 #include <cstring>
 #include <utility>

+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/hle/service/nvdrv/core/container.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
-#include "core/hle/service/nvdrv/devices/nvmap.h"
+#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+#include "core/hle/service/nvdrv/nvdrv.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"

 namespace Service::Nvidia::Devices {

-nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_)
-    : nvdevice{system_}, nvmap_dev{std::move(nvmap_dev_)} {}
+nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
+    : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, vm{},
+      gmmu{} {}
+
 nvhost_as_gpu::~nvhost_as_gpu() = default;

 NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -82,12 +91,52 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>&
    IoctlAllocAsEx params{};
    std::memcpy(&params, input.data(), input.size());

-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x{:X}", params.big_page_size);
-    if (params.big_page_size == 0) {
-        params.big_page_size = DEFAULT_BIG_PAGE_SIZE;
+    LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size);
+
+    std::scoped_lock lock(mutex);
+
+    if (vm.initialised) {
+        ASSERT_MSG(false, "Cannot initialise an address space twice!");
+        return NvResult::InvalidState;
    }

-    big_page_size = params.big_page_size;
+    if (params.big_page_size) {
+        if (!std::has_single_bit(params.big_page_size)) {
+            LOG_ERROR(Service_NVDRV, "Non power-of-2 big page size: 0x{:X}!", params.big_page_size);
+            return NvResult::BadValue;
+        }
+
+        if ((params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES) == 0) {
+            LOG_ERROR(Service_NVDRV, "Unsupported big page size: 0x{:X}!", params.big_page_size);
+            return NvResult::BadValue;
+        }
+
+        vm.big_page_size = params.big_page_size;
+        vm.big_page_size_bits = static_cast<u32>(std::countr_zero(params.big_page_size));
+
+        vm.va_range_start = params.big_page_size << VM::VA_START_SHIFT;
+    }
+
+    // If this is unspecified then default values should be used
+    if (params.va_range_start) {
+        vm.va_range_start = params.va_range_start;
+        vm.va_range_split = params.va_range_split;
+        vm.va_range_end = params.va_range_end;
+    }
+
+    const auto start_pages{static_cast<u32>(vm.va_range_start >> VM::PAGE_SIZE_BITS)};
+    const auto end_pages{static_cast<u32>(vm.va_range_split >> VM::PAGE_SIZE_BITS)};
+    vm.small_page_allocator = std::make_shared<VM::Allocator>(start_pages, end_pages);
+
+    const auto start_big_pages{static_cast<u32>(vm.va_range_split >> vm.big_page_size_bits)};
+    const auto end_big_pages{
+        static_cast<u32>((vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits)};
+    vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages);
+
+    gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, vm.big_page_size_bits,
+                                                  VM::PAGE_SIZE_BITS);
+    system.GPU().InitAddressSpace(*gmmu);
+    vm.initialised = true;

    return NvResult::Success;
 }
@@ -99,21 +148,76 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
    LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages,
              params.page_size, params.flags);

-    const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
-    if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) {
-        params.offset = *system.GPU().MemoryManager().AllocateFixed(params.offset, size);
-    } else {
-        params.offset = system.GPU().MemoryManager().Allocate(size, params.align);
+    std::scoped_lock lock(mutex);
+
+    if (!vm.initialised) {
+        return NvResult::BadValue;
    }

-    auto result = NvResult::Success;
-    if (!params.offset) {
-        LOG_CRITICAL(Service_NVDRV, "allocation failed for size {}", size);
-        result = NvResult::InsufficientMemory;
+    if (params.page_size != VM::YUZU_PAGESIZE && params.page_size != vm.big_page_size) {
+        return NvResult::BadValue;
    }

+    if (params.page_size != vm.big_page_size &&
+        ((params.flags & MappingFlags::Sparse) != MappingFlags::None)) {
+        UNIMPLEMENTED_MSG("Sparse small pages are not implemented!");
+        return NvResult::NotImplemented;
+    }
+
+    const u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS
+                                                                   : vm.big_page_size_bits};
+
+    auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator
+                                                          : *vm.big_page_allocator};
+
+    if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
+        allocator.AllocateFixed(static_cast<u32>(params.offset >> page_size_bits), params.pages);
+    } else {
+        params.offset = static_cast<u64>(allocator.Allocate(params.pages)) << page_size_bits;
+        if (!params.offset) {
+            ASSERT_MSG(false, "Failed to allocate free space in the GPU AS!");
+            return NvResult::InsufficientMemory;
+        }
+    }
+
+    u64 size{static_cast<u64>(params.pages) * params.page_size};
+
+    if ((params.flags & MappingFlags::Sparse) != MappingFlags::None) {
+        gmmu->MapSparse(params.offset, size);
+    }
+
+    allocation_map[params.offset] = {
+        .size = size,
+        .mappings{},
+        .page_size = params.page_size,
+        .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None,
+        .big_pages = params.page_size != VM::YUZU_PAGESIZE,
+    };
+
    std::memcpy(output.data(), &params, output.size());
-    return result;
+    return NvResult::Success;
+}
+
+void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
+    auto mapping{mapping_map.at(offset)};
+
+    if (!mapping->fixed) {
+        auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
+        u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
+
+        allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits),
+                       static_cast<u32>(mapping->size >> page_size_bits));
+    }
+
+    // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
+    // Only FreeSpace can unmap them fully
+    if (mapping->sparse_alloc) {
+        gmmu->MapSparse(offset, mapping->size, mapping->big_page);
+    } else {
+        gmmu->Unmap(offset, mapping->size);
+    }
+
+    mapping_map.erase(offset);
 }

 NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -123,8 +227,40 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
    LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset,
              params.pages, params.page_size);

-    system.GPU().MemoryManager().Unmap(params.offset,
-                                       static_cast<std::size_t>(params.pages) * params.page_size);
+    std::scoped_lock lock(mutex);
+
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }
+
+    try {
+        auto allocation{allocation_map[params.offset]};
+
+        if (allocation.page_size != params.page_size ||
+            allocation.size != (static_cast<u64>(params.pages) * params.page_size)) {
+            return NvResult::BadValue;
+        }
+
+        for (const auto& mapping : allocation.mappings) {
+            FreeMappingLocked(mapping->offset);
+        }
+
+        // Unset sparse flag if required
+        if (allocation.sparse) {
+            gmmu->Unmap(params.offset, allocation.size);
+        }
+
+        auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator
+                                                              : *vm.big_page_allocator};
+        u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS
+                                                                 : vm.big_page_size_bits};
+
+        allocator.Free(static_cast<u32>(params.offset >> page_size_bits),
+                       static_cast<u32>(allocation.size >> page_size_bits));
+        allocation_map.erase(params.offset);
+    } catch (const std::out_of_range&) {
+        return NvResult::BadValue;
+    }

    std::memcpy(output.data(), &params, output.size());
    return NvResult::Success;
@@ -135,35 +271,52 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out

    LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries);

-    auto result = NvResult::Success;
    std::vector<IoctlRemapEntry> entries(num_entries);
    std::memcpy(entries.data(), input.data(), input.size());

-    for (const auto& entry : entries) {
-        LOG_DEBUG(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
-                  entry.offset, entry.nvmap_handle, entry.pages);
+    std::scoped_lock lock(mutex);

-        const auto object{nvmap_dev->GetObject(entry.nvmap_handle)};
-        if (!object) {
-            LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", entry.nvmap_handle);
-            result = NvResult::InvalidState;
-            break;
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }
+
+    for (const auto& entry : entries) {
+        GPUVAddr virtual_address{static_cast<u64>(entry.as_offset_big_pages)
+                                 << vm.big_page_size_bits};
+        u64 size{static_cast<u64>(entry.big_pages) << vm.big_page_size_bits};
+
+        auto alloc{allocation_map.upper_bound(virtual_address)};
+
+        if (alloc-- == allocation_map.begin() ||
+            (virtual_address - alloc->first) + size > alloc->second.size) {
+            LOG_WARNING(Service_NVDRV, "Cannot remap into an unallocated region!");
+            return NvResult::BadValue;
        }

-        const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
-        const auto size{static_cast<u64>(entry.pages) << 0x10};
-        const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10};
-        const auto addr{system.GPU().MemoryManager().Map(object->addr + map_offset, offset, size)};
+        if (!alloc->second.sparse) {
+            LOG_WARNING(Service_NVDRV, "Cannot remap a non-sparse mapping!");
+            return NvResult::BadValue;
+        }

-        if (!addr) {
-            LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!");
-            result = NvResult::InvalidState;
-            break;
+        const bool use_big_pages = alloc->second.big_pages;
+        if (!entry.handle) {
+            gmmu->MapSparse(virtual_address, size, use_big_pages);
+        } else {
+            auto handle{nvmap.GetHandle(entry.handle)};
+            if (!handle) {
+                return NvResult::BadValue;
+            }
+
+            VAddr cpu_address{static_cast<VAddr>(
+                handle->address +
+                (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
+
+            gmmu->Map(virtual_address, cpu_address, size, use_big_pages);
        }
    }

    std::memcpy(output.data(), entries.data(), output.size());
-    return result;
+    return NvResult::Success;
 }

 NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -173,79 +326,98 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
    LOG_DEBUG(Service_NVDRV,
              "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}"
              ", offset={}",
-              params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size,
+              params.flags, params.handle, params.buffer_offset, params.mapping_size,
              params.offset);

-    const auto object{nvmap_dev->GetObject(params.nvmap_handle)};
-    if (!object) {
-        LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", params.nvmap_handle);
-        std::memcpy(output.data(), &params, output.size());
-        return NvResult::InvalidState;
+    std::scoped_lock lock(mutex);
+
+    if (!vm.initialised) {
+        return NvResult::BadValue;
    }

-    // The real nvservices doesn't make a distinction between handles and ids, and
-    // object can only have one handle and it will be the same as its id. Assert that this is the
-    // case to prevent unexpected behavior.
-    ASSERT(object->id == params.nvmap_handle);
-    auto& gpu = system.GPU();
+    // Remaps a subregion of an existing mapping to a different PA
+    if ((params.flags & MappingFlags::Remap) != MappingFlags::None) {
+        try {
+            auto mapping{mapping_map.at(params.offset)};

-    u64 page_size{params.page_size};
-    if (!page_size) {
-        page_size = object->align;
-    }
-
-    if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) {
-        if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) {
-            const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)};
-            const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)};
-
-            if (!gpu.MemoryManager().Map(cpu_addr, gpu_addr, params.mapping_size)) {
-                LOG_CRITICAL(Service_NVDRV,
-                             "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, "
-                             "mapping_size = {}, offset={}",
-                             params.flags, params.nvmap_handle, params.buffer_offset,
-                             params.mapping_size, params.offset);
-
-                std::memcpy(output.data(), &params, output.size());
-                return NvResult::InvalidState;
+            if (mapping->size < params.mapping_size) {
+                LOG_WARNING(Service_NVDRV,
+                            "Cannot remap a partially mapped GPU address space region: 0x{:X}",
+                            params.offset);
+                return NvResult::BadValue;
            }

-            std::memcpy(output.data(), &params, output.size());
-            return NvResult::Success;
-        } else {
-            LOG_CRITICAL(Service_NVDRV, "address not mapped offset={}", params.offset);
+            u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
+            VAddr cpu_address{mapping->ptr + params.buffer_offset};

-            std::memcpy(output.data(), &params, output.size());
-            return NvResult::InvalidState;
+            gmmu->Map(gpu_address, cpu_address, params.mapping_size, mapping->big_page);
+
+            return NvResult::Success;
+        } catch (const std::out_of_range&) {
+            LOG_WARNING(Service_NVDRV, "Cannot remap an unmapped GPU address space region: 0x{:X}",
+                        params.offset);
+            return NvResult::BadValue;
        }
    }

-    // We can only map objects that have already been assigned a CPU address.
-    ASSERT(object->status == nvmap::Object::Status::Allocated);
-
-    const auto physical_address{object->addr + params.buffer_offset};
-    u64 size{params.mapping_size};
-    if (!size) {
-        size = object->size;
+    auto handle{nvmap.GetHandle(params.handle)};
+    if (!handle) {
+        return NvResult::BadValue;
    }

-    const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None};
-    if (is_alloc) {
-        params.offset = gpu.MemoryManager().MapAllocate(physical_address, size, page_size);
-    } else {
-        params.offset = gpu.MemoryManager().Map(physical_address, params.offset, size);
-    }
+    VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)};
+    u64 size{params.mapping_size ? params.mapping_size : handle->orig_size};

-    auto result = NvResult::Success;
-    if (!params.offset) {
-        LOG_CRITICAL(Service_NVDRV, "failed to map size={}", size);
-        result = NvResult::InvalidState;
+    bool big_page{[&]() {
+        if (Common::IsAligned(handle->align, vm.big_page_size)) {
+            return true;
+        } else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) {
+            return false;
+        } else {
+            ASSERT(false);
+            return false;
+        }
+    }()};
+
+    if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
+        auto alloc{allocation_map.upper_bound(params.offset)};
+
+        if (alloc-- == allocation_map.begin() ||
+            (params.offset - alloc->first) + size > alloc->second.size) {
+            ASSERT_MSG(false, "Cannot perform a fixed mapping into an unallocated region!");
+            return NvResult::BadValue;
+        }
+
+        const bool use_big_pages = alloc->second.big_pages && big_page;
+        gmmu->Map(params.offset, cpu_address, size, use_big_pages);
+
+        auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true,
+                                               use_big_pages, alloc->second.sparse)};
+        alloc->second.mappings.push_back(mapping);
+        mapping_map[params.offset] = mapping;
    } else {
-        AddBufferMap(params.offset, size, physical_address, is_alloc);
+
+        auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
+        u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE};
+        u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
+
+        params.offset = static_cast<u64>(allocator.Allocate(
+                            static_cast<u32>(Common::AlignUp(size, page_size) >> page_size_bits)))
+                        << page_size_bits;
+        if (!params.offset) {
+            ASSERT_MSG(false, "Failed to allocate free space in the GPU AS!");
+            return NvResult::InsufficientMemory;
+        }
+
+        gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), big_page);
+
+        auto mapping{
+            std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)};
+        mapping_map[params.offset] = mapping;
    }

    std::memcpy(output.data(), &params, output.size());
-    return result;
+    return NvResult::Success;
 }

 NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -254,47 +426,82 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8

    LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);

-    if (const auto size{RemoveBufferMap(params.offset)}; size) {
-        system.GPU().MemoryManager().Unmap(params.offset, *size);
-    } else {
-        LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset);
+    std::scoped_lock lock(mutex);
+
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }
+
+    try {
+        auto mapping{mapping_map.at(params.offset)};
+
+        if (!mapping->fixed) {
+            auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
+            u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
+
+            allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits),
+                           static_cast<u32>(mapping->size >> page_size_bits));
+        }
+
+        // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
+        // Only FreeSpace can unmap them fully
+        if (mapping->sparse_alloc) {
+            gmmu->MapSparse(params.offset, mapping->size, mapping->big_page);
+        } else {
+            gmmu->Unmap(params.offset, mapping->size);
+        }
+
+        mapping_map.erase(params.offset);
+    } catch (const std::out_of_range&) {
+        LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset);
    }

-    std::memcpy(output.data(), &params, output.size());
    return NvResult::Success;
 }

 NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlBindChannel params{};
    std::memcpy(&params, input.data(), input.size());
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}", params.fd);
+    LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);

-    channel = params.fd;
+    auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
+    gpu_channel_device->channel_state->memory_manager = gmmu;
    return NvResult::Success;
 }

+void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) {
+    params.buf_size = 2 * sizeof(VaRegion);
+
+    params.regions = std::array<VaRegion, 2>{
+        VaRegion{
+            .offset = vm.small_page_allocator->GetVAStart() << VM::PAGE_SIZE_BITS,
+            .page_size = VM::YUZU_PAGESIZE,
+            ._pad0_{},
+            .pages = vm.small_page_allocator->GetVALimit() - vm.small_page_allocator->GetVAStart(),
+        },
+        VaRegion{
+            .offset = vm.big_page_allocator->GetVAStart() << vm.big_page_size_bits,
+            .page_size = vm.big_page_size,
+            ._pad0_{},
+            .pages = vm.big_page_allocator->GetVALimit() - vm.big_page_allocator->GetVAStart(),
+        },
+    };
+}
+
 NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlGetVaRegions params{};
    std::memcpy(&params, input.data(), input.size());

-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
-                params.buf_size);
+    LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
+              params.buf_size);

-    params.buf_size = 0x30;
+    std::scoped_lock lock(mutex);

-    params.small = IoctlVaRegion{
-        .offset = 0x04000000,
-        .page_size = DEFAULT_SMALL_PAGE_SIZE,
-        .pages = 0x3fbfff,
-    };
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }

-    params.big = IoctlVaRegion{
-        .offset = 0x04000000,
-        .page_size = big_page_size,
-        .pages = 0x1bffff,
-    };
-
-    // TODO(ogniK): This probably can stay stubbed but should add support way way later
+    GetVARegionsImpl(params);

    std::memcpy(output.data(), &params, output.size());
    return NvResult::Success;
@@ -305,62 +512,27 @@ NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u
    IoctlGetVaRegions params{};
    std::memcpy(&params, input.data(), input.size());

-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
-                params.buf_size);
+    LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
+              params.buf_size);

-    params.buf_size = 0x30;
+    std::scoped_lock lock(mutex);

-    params.small = IoctlVaRegion{
-        .offset = 0x04000000,
-        .page_size = 0x1000,
-        .pages = 0x3fbfff,
-    };
+    if (!vm.initialised) {
+        return NvResult::BadValue;
+    }

-    params.big = IoctlVaRegion{
-        .offset = 0x04000000,
-        .page_size = big_page_size,
-        .pages = 0x1bffff,
-    };
-
-    // TODO(ogniK): This probably can stay stubbed but should add support way way later
+    GetVARegionsImpl(params);

    std::memcpy(output.data(), &params, output.size());
-    std::memcpy(inline_output.data(), &params.small, sizeof(IoctlVaRegion));
-    std::memcpy(inline_output.data() + sizeof(IoctlVaRegion), &params.big, sizeof(IoctlVaRegion));
+    std::memcpy(inline_output.data(), &params.regions[0], sizeof(VaRegion));
+    std::memcpy(inline_output.data() + sizeof(VaRegion), &params.regions[1], sizeof(VaRegion));

    return NvResult::Success;
 }

-std::optional<nvhost_as_gpu::BufferMap> nvhost_as_gpu::FindBufferMap(GPUVAddr gpu_addr) const {
-    const auto end{buffer_mappings.upper_bound(gpu_addr)};
-    for (auto iter{buffer_mappings.begin()}; iter != end; ++iter) {
-        if (gpu_addr >= iter->second.StartAddr() && gpu_addr < iter->second.EndAddr()) {
-            return iter->second;
-        }
-    }
-
-    return std::nullopt;
-}
-
-void nvhost_as_gpu::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr,
-                                 bool is_allocated) {
-    buffer_mappings[gpu_addr] = {gpu_addr, size, cpu_addr, is_allocated};
-}
-
-std::optional<std::size_t> nvhost_as_gpu::RemoveBufferMap(GPUVAddr gpu_addr) {
-    if (const auto iter{buffer_mappings.find(gpu_addr)}; iter != buffer_mappings.end()) {
-        std::size_t size{};
-
-        if (iter->second.IsAllocated()) {
-            size = iter->second.Size();
-        }
-
-        buffer_mappings.erase(iter);
-
-        return size;
-    }
-
-    return std::nullopt;
+Kernel::KEvent* nvhost_as_gpu::QueryEvent(u32 event_id) {
+    LOG_CRITICAL(Service_NVDRV, "Unknown AS GPU Event {}", event_id);
+    return nullptr;
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -1,35 +1,50 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later

 #pragma once

+#include <bit>
+#include <list>
 #include <map>
 #include <memory>
+#include <mutex>
 #include <optional>
 #include <vector>

+#include "common/address_space.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"

+namespace Tegra {
+class MemoryManager;
+} // namespace Tegra
+
+namespace Service::Nvidia {
+class Module;
+}
+
+namespace Service::Nvidia::NvCore {
+class Container;
+class NvMap;
+} // namespace Service::Nvidia::NvCore
+
 namespace Service::Nvidia::Devices {

-constexpr u32 DEFAULT_BIG_PAGE_SIZE = 1 << 16;
-constexpr u32 DEFAULT_SMALL_PAGE_SIZE = 1 << 12;
-
-class nvmap;
-
-enum class AddressSpaceFlags : u32 {
-    None = 0x0,
-    FixedOffset = 0x1,
-    Remap = 0x100,
+enum class MappingFlags : u32 {
+    None = 0,
+    Fixed = 1 << 0,
+    Sparse = 1 << 1,
+    Remap = 1 << 8,
 };
-DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags);
+DECLARE_ENUM_FLAG_OPERATORS(MappingFlags);

 class nvhost_as_gpu final : public nvdevice {
 public:
-    explicit nvhost_as_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_);
+    explicit nvhost_as_gpu(Core::System& system_, Module& module, NvCore::Container& core);
    ~nvhost_as_gpu() override;

    NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -42,46 +57,17 @@ public:
    void OnOpen(DeviceFD fd) override;
    void OnClose(DeviceFD fd) override;

-private:
-    class BufferMap final {
-    public:
-        constexpr BufferMap() = default;
+    Kernel::KEvent* QueryEvent(u32 event_id) override;

-        constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_)
-            : start_addr{start_addr_}, end_addr{start_addr_ + size_} {}
-
-        constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_,
-                            bool is_allocated_)
-            : start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_},
-              is_allocated{is_allocated_} {}
-
-        constexpr VAddr StartAddr() const {
-            return start_addr;
-        }
-
-        constexpr VAddr EndAddr() const {
-            return end_addr;
-        }
-
-        constexpr std::size_t Size() const {
-            return end_addr - start_addr;
-        }
-
-        constexpr VAddr CpuAddr() const {
-            return cpu_addr;
-        }
-
-        constexpr bool IsAllocated() const {
-            return is_allocated;
-        }
-
-    private:
-        GPUVAddr start_addr{};
-        GPUVAddr end_addr{};
-        VAddr cpu_addr{};
-        bool is_allocated{};
+    struct VaRegion {
+        u64 offset;
+        u32 page_size;
+        u32 _pad0_;
+        u64 pages;
    };
+    static_assert(sizeof(VaRegion) == 0x18);

+private:
    struct IoctlAllocAsEx {
        u32_le flags{}; // usually passes 1
        s32_le as_fd{}; // ignored; passes 0
@@ -96,7 +82,7 @@ private:
    struct IoctlAllocSpace {
        u32_le pages{};
        u32_le page_size{};
-        AddressSpaceFlags flags{};
+        MappingFlags flags{};
        INSERT_PADDING_WORDS(1);
        union {
            u64_le offset;
@@ -113,19 +99,19 @@ private:
    static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size");

    struct IoctlRemapEntry {
-        u16_le flags{};
-        u16_le kind{};
-        u32_le nvmap_handle{};
-        u32_le map_offset{};
-        u32_le offset{};
-        u32_le pages{};
+        u16 flags;
+        u16 kind;
+        NvCore::NvMap::Handle::Id handle;
+        u32 handle_offset_big_pages;
+        u32 as_offset_big_pages;
+        u32 big_pages;
    };
    static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size");

    struct IoctlMapBufferEx {
-        AddressSpaceFlags flags{}; // bit0: fixed_offset, bit2: cacheable
-        u32_le kind{};             // -1 is default
-        u32_le nvmap_handle{};
+        MappingFlags flags{}; // bit0: fixed_offset, bit2: cacheable
+        u32_le kind{};        // -1 is default
+        NvCore::NvMap::Handle::Id handle;
        u32_le page_size{}; // 0 means don't care
        s64_le buffer_offset{};
        u64_le mapping_size{};
@@ -143,27 +129,15 @@ private:
    };
    static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size");

-    struct IoctlVaRegion {
-        u64_le offset{};
-        u32_le page_size{};
-        INSERT_PADDING_WORDS(1);
-        u64_le pages{};
-    };
-    static_assert(sizeof(IoctlVaRegion) == 24, "IoctlVaRegion is incorrect size");
-
    struct IoctlGetVaRegions {
        u64_le buf_addr{}; // (contained output user ptr on linux, ignored)
        u32_le buf_size{}; // forced to 2*sizeof(struct va_region)
        u32_le reserved{};
-        IoctlVaRegion small{};
-        IoctlVaRegion big{};
+        std::array<VaRegion, 2> regions{};
    };
-    static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2,
+    static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2,
                  "IoctlGetVaRegions is incorrect size");

-    s32 channel{};
-    u32 big_page_size{DEFAULT_BIG_PAGE_SIZE};
-
    NvResult AllocAsEx(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult Remap(const std::vector<u8>& input, std::vector<u8>& output);
@@ -172,18 +146,75 @@ private:
    NvResult FreeSpace(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult BindChannel(const std::vector<u8>& input, std::vector<u8>& output);

+    void GetVARegionsImpl(IoctlGetVaRegions& params);
    NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output,
                          std::vector<u8>& inline_output);

-    std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const;
-    void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
-    std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
+    void FreeMappingLocked(u64 offset);

-    std::shared_ptr<nvmap> nvmap_dev;
+    Module& module;

-    // This is expected to be ordered, therefore we must use a map, not unordered_map
-    std::map<GPUVAddr, BufferMap> buffer_mappings;
+    NvCore::Container& container;
+    NvCore::NvMap& nvmap;
+
+    struct Mapping {
+        VAddr ptr;
+        u64 offset;
+        u64 size;
+        bool fixed;
+        bool big_page; // Only valid if fixed == false
+        bool sparse_alloc;
+
+        Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_)
+            : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_),
+              sparse_alloc(sparse_alloc_) {}
+    };
+
+    struct Allocation {
+        u64 size;
+        std::list<std::shared_ptr<Mapping>> mappings;
+        u32 page_size;
+        bool sparse;
+        bool big_pages;
+    };
+
+    std::map<u64, std::shared_ptr<Mapping>>
+        mapping_map; //!< This maps the base addresses of mapped buffers to their total sizes and
+                     //!< mapping type, this is needed as what was originally a single buffer may
+                     //!< have been split into multiple GPU side buffers with the remap flag.
+    std::map<u64, Allocation> allocation_map; //!< Holds allocations created by AllocSpace from
+                                              //!< which fixed buffers can be mapped into
+    std::mutex mutex;                         //!< Locks all AS operations
+
+    struct VM {
+        static constexpr u32 YUZU_PAGESIZE{0x1000};
+        static constexpr u32 PAGE_SIZE_BITS{std::countr_zero(YUZU_PAGESIZE)};
+
+        static constexpr u32 SUPPORTED_BIG_PAGE_SIZES{0x30000};
+        static constexpr u32 DEFAULT_BIG_PAGE_SIZE{0x20000};
+        u32 big_page_size{DEFAULT_BIG_PAGE_SIZE};
+        u32 big_page_size_bits{std::countr_zero(DEFAULT_BIG_PAGE_SIZE)};
+
+        static constexpr u32 VA_START_SHIFT{10};
+        static constexpr u64 DEFAULT_VA_SPLIT{1ULL << 34};
+        static constexpr u64 DEFAULT_VA_RANGE{1ULL << 37};
+        u64 va_range_start{DEFAULT_BIG_PAGE_SIZE << VA_START_SHIFT};
+        u64 va_range_split{DEFAULT_VA_SPLIT};
+        u64 va_range_end{DEFAULT_VA_RANGE};
+
+        using Allocator = Common::FlatAllocator<u32, 0, 32>;
+
+        std::unique_ptr<Allocator> big_page_allocator;
+        std::shared_ptr<Allocator>
+            small_page_allocator; //! Shared as this is also used by nvhost::GpuChannel
+
+        bool initialised{};
+    } vm;
+    std::shared_ptr<Tegra::MemoryManager> gmmu;
+
+    // s32 channel{};
+    // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -1,24 +1,39 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later

+#include <bit>
 #include <cstdlib>
 #include <cstring>

+#include <fmt/format.h>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/hle/kernel/k_event.h"
 #include "core/hle/kernel/k_writable_event.h"
+#include "core/hle/service/nvdrv/core/container.h"
+#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/host1x.h"

 namespace Service::Nvidia::Devices {

 nvhost_ctrl::nvhost_ctrl(Core::System& system_, EventInterface& events_interface_,
-                         SyncpointManager& syncpoint_manager_)
-    : nvdevice{system_}, events_interface{events_interface_}, syncpoint_manager{
-                                                                  syncpoint_manager_} {}
-nvhost_ctrl::~nvhost_ctrl() = default;
+                         NvCore::Container& core_)
+    : nvdevice{system_}, events_interface{events_interface_}, core{core_},
+      syncpoint_manager{core_.GetSyncpointManager()} {}
+
+nvhost_ctrl::~nvhost_ctrl() {
+    for (auto& event : events) {
+        if (!event.registered) {
+            continue;
+        }
+        events_interface.FreeEvent(event.kevent);
+    }
+}

 NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
                             std::vector<u8>& output) {
@@ -30,13 +45,15 @@ NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>&
        case 0x1c:
            return IocCtrlClearEventWait(input, output);
        case 0x1d:
-            return IocCtrlEventWait(input, output, false);
-        case 0x1e:
            return IocCtrlEventWait(input, output, true);
+        case 0x1e:
+            return IocCtrlEventWait(input, output, false);
        case 0x1f:
            return IocCtrlEventRegister(input, output);
        case 0x20:
            return IocCtrlEventUnregister(input, output);
+        case 0x21:
+            return IocCtrlEventUnregisterBatch(input, output);
        }
        break;
    default:
@@ -60,6 +77,7 @@ NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>&
 }

 void nvhost_ctrl::OnOpen(DeviceFD fd) {}
+
 void nvhost_ctrl::OnClose(DeviceFD fd) {}

 NvResult nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -71,116 +89,167 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector
 }

 NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
-                                       bool is_async) {
+                                       bool is_allocation) {
    IocCtrlEventWaitParams params{};
    std::memcpy(&params, input.data(), sizeof(params));
-    LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}",
-              params.syncpt_id, params.threshold, params.timeout, is_async);
+    LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_allocation={}",
+              params.fence.id, params.fence.value, params.timeout, is_allocation);

-    if (params.syncpt_id >= MaxSyncPoints) {
+    bool must_unmark_fail = !is_allocation;
+    const u32 event_id = params.value.raw;
+    SCOPE_EXIT({
+        std::memcpy(output.data(), &params, sizeof(params));
+        if (must_unmark_fail) {
+            events[event_id].fails = 0;
+        }
+    });
+
+    const u32 fence_id = static_cast<u32>(params.fence.id);
+
+    if (fence_id >= MaxSyncPoints) {
        return NvResult::BadParameter;
    }

-    u32 event_id = params.value & 0x00FF;
+    if (params.fence.value == 0) {
+        if (!syncpoint_manager.IsSyncpointAllocated(params.fence.id)) {
+            LOG_WARNING(Service_NVDRV,
+                        "Unallocated syncpt_id={}, threshold={}, timeout={}, is_allocation={}",
+                        params.fence.id, params.fence.value, params.timeout, is_allocation);
+        } else {
+            params.value.raw = syncpoint_manager.ReadSyncpointMinValue(fence_id);
+        }
+        return NvResult::Success;
+    }

-    if (event_id >= MaxNvEvents) {
-        std::memcpy(output.data(), &params, sizeof(params));
+    if (syncpoint_manager.IsFenceSignalled(params.fence)) {
+        params.value.raw = syncpoint_manager.ReadSyncpointMinValue(fence_id);
+        return NvResult::Success;
+    }
+
+    if (const auto new_value = syncpoint_manager.UpdateMin(fence_id);
+        syncpoint_manager.IsFenceSignalled(params.fence)) {
+        params.value.raw = new_value;
+        return NvResult::Success;
+    }
+
+    auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager();
+    const u32 target_value = params.fence.value;
+
+    auto lock = NvEventsLock();
+
+    u32 slot = [&]() {
+        if (is_allocation) {
+            params.value.raw = 0;
+            return FindFreeNvEvent(fence_id);
+        } else {
+            return params.value.raw;
+        }
+    }();
+
+    must_unmark_fail = false;
+
+    const auto check_failing = [&]() {
+        if (events[slot].fails > 2) {
+            {
+                auto lk = system.StallProcesses();
+                host1x_syncpoint_manager.WaitHost(fence_id, target_value);
+                system.UnstallProcesses();
+            }
+            params.value.raw = target_value;
+            return true;
+        }
+        return false;
+    };
+
+    if (slot >= MaxNvEvents) {
        return NvResult::BadParameter;
    }

-    if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
-        params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id);
-        std::memcpy(output.data(), &params, sizeof(params));
-        events_interface.failed[event_id] = false;
-        return NvResult::Success;
-    }
-
-    if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id);
-        syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
-        params.value = new_value;
-        std::memcpy(output.data(), &params, sizeof(params));
-        events_interface.failed[event_id] = false;
-        return NvResult::Success;
-    }
-
-    auto& event = events_interface.events[event_id];
-    auto& gpu = system.GPU();
-
-    // This is mostly to take into account unimplemented features. As synced
-    // gpu is always synced.
-    if (!gpu.IsAsync()) {
-        event.event->GetWritableEvent().Signal();
-        return NvResult::Success;
-    }
-    const u32 current_syncpoint_value = event.fence.value;
-    const s32 diff = current_syncpoint_value - params.threshold;
-    if (diff >= 0) {
-        event.event->GetWritableEvent().Signal();
-        params.value = current_syncpoint_value;
-        std::memcpy(output.data(), &params, sizeof(params));
-        events_interface.failed[event_id] = false;
-        return NvResult::Success;
-    }
-    const u32 target_value = current_syncpoint_value - diff;
-
-    if (!is_async) {
-        params.value = 0;
-    }
-
    if (params.timeout == 0) {
-        std::memcpy(output.data(), &params, sizeof(params));
+        if (check_failing()) {
+            events[slot].fails = 0;
+            return NvResult::Success;
+        }
        return NvResult::Timeout;
    }

-    EventState status = events_interface.status[event_id];
-    const bool bad_parameter = status == EventState::Busy;
-    if (bad_parameter) {
-        std::memcpy(output.data(), &params, sizeof(params));
+    auto& event = events[slot];
+
+    if (!event.registered) {
        return NvResult::BadParameter;
    }
-    events_interface.SetEventStatus(event_id, EventState::Waiting);
-    events_interface.assigned_syncpt[event_id] = params.syncpt_id;
-    events_interface.assigned_value[event_id] = target_value;
-    if (is_async) {
-        params.value = params.syncpt_id << 4;
-    } else {
-        params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
+
+    if (event.IsBeingUsed()) {
+        return NvResult::BadParameter;
    }
-    params.value |= event_id;
-    event.event->GetWritableEvent().Clear();
-    if (events_interface.failed[event_id]) {
-        {
-            auto lk = system.StallProcesses();
-            gpu.WaitFence(params.syncpt_id, target_value);
-            system.UnstallProcesses();
-        }
-        std::memcpy(output.data(), &params, sizeof(params));
-        events_interface.failed[event_id] = false;
+
+    if (check_failing()) {
+        event.fails = 0;
        return NvResult::Success;
    }
-    gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
-    std::memcpy(output.data(), &params, sizeof(params));
+
+    params.value.raw = 0;
+
+    event.status.store(EventState::Waiting, std::memory_order_release);
+    event.assigned_syncpt = fence_id;
+    event.assigned_value = target_value;
+    if (is_allocation) {
+        params.value.syncpoint_id_for_allocation.Assign(static_cast<u16>(fence_id));
+        params.value.event_allocated.Assign(1);
+    } else {
+        params.value.syncpoint_id.Assign(fence_id);
+    }
+    params.value.raw |= slot;
+
+    event.wait_handle =
+        host1x_syncpoint_manager.RegisterHostAction(fence_id, target_value, [this, slot]() {
+            auto& event_ = events[slot];
+            if (event_.status.exchange(EventState::Signalling, std::memory_order_acq_rel) ==
+                EventState::Waiting) {
+                event_.kevent->GetWritableEvent().Signal();
+            }
+            event_.status.store(EventState::Signalled, std::memory_order_release);
+        });
    return NvResult::Timeout;
 }

+NvResult nvhost_ctrl::FreeEvent(u32 slot) {
+    if (slot >= MaxNvEvents) {
+        return NvResult::BadParameter;
+    }
+
+    auto& event = events[slot];
+
+    if (!event.registered) {
+        return NvResult::Success;
+    }
+
+    if (event.IsBeingUsed()) {
+        return NvResult::Busy;
+    }
+
+    FreeNvEvent(slot);
+    return NvResult::Success;
+}
+
 NvResult nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) {
    IocCtrlEventRegisterParams params{};
    std::memcpy(&params, input.data(), sizeof(params));
-    const u32 event_id = params.user_event_id & 0x00FF;
+    const u32 event_id = params.user_event_id;
    LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id);
    if (event_id >= MaxNvEvents) {
        return NvResult::BadParameter;
    }
-    if (events_interface.registered[event_id]) {
-        const auto event_state = events_interface.status[event_id];
-        if (event_state != EventState::Free) {
-            LOG_WARNING(Service_NVDRV, "Event already registered! Unregistering previous event");
-            events_interface.UnregisterEvent(event_id);
-        } else {
-            return NvResult::BadParameter;
+
+    auto lock = NvEventsLock();
+
+    if (events[event_id].registered) {
+        const auto result = FreeEvent(event_id);
+        if (result != NvResult::Success) {
+            return result;
        }
    }
-    events_interface.RegisterEvent(event_id);
+    CreateNvEvent(event_id);
    return NvResult::Success;
 }

@@ -190,34 +259,142 @@ NvResult nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input,
    std::memcpy(&params, input.data(), sizeof(params));
    const u32 event_id = params.user_event_id & 0x00FF;
    LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id);
-    if (event_id >= MaxNvEvents) {
-        return NvResult::BadParameter;
+
+    auto lock = NvEventsLock();
+    return FreeEvent(event_id);
+}
+
+NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(const std::vector<u8>& input,
+                                                  std::vector<u8>& output) {
+    IocCtrlEventUnregisterBatchParams params{};
+    std::memcpy(&params, input.data(), sizeof(params));
+    u64 event_mask = params.user_events;
+    LOG_DEBUG(Service_NVDRV, " called, event_mask: {:X}", event_mask);
+
+    auto lock = NvEventsLock();
+    while (event_mask != 0) {
+        const u64 event_id = std::countr_zero(event_mask);
+        event_mask &= ~(1ULL << event_id);
+        const auto result = FreeEvent(static_cast<u32>(event_id));
+        if (result != NvResult::Success) {
+            return result;
+        }
    }
-    if (!events_interface.registered[event_id]) {
-        return NvResult::BadParameter;
-    }
-    events_interface.UnregisterEvent(event_id);
    return NvResult::Success;
 }

 NvResult nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
-    IocCtrlEventSignalParams params{};
+    IocCtrlEventClearParams params{};
    std::memcpy(&params, input.data(), sizeof(params));

-    u32 event_id = params.event_id & 0x00FF;
-    LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id);
+    u32 event_id = params.event_id.slot;
+    LOG_DEBUG(Service_NVDRV, "called, event_id: {:X}", event_id);

    if (event_id >= MaxNvEvents) {
        return NvResult::BadParameter;
    }
-    if (events_interface.status[event_id] == EventState::Waiting) {
-        events_interface.LiberateEvent(event_id);
-    }
-    events_interface.failed[event_id] = true;

-    syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id);
+    auto lock = NvEventsLock();
+
+    auto& event = events[event_id];
+    if (event.status.exchange(EventState::Cancelling, std::memory_order_acq_rel) ==
+        EventState::Waiting) {
+        auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager();
+        host1x_syncpoint_manager.DeregisterHostAction(event.assigned_syncpt, event.wait_handle);
+        syncpoint_manager.UpdateMin(event.assigned_syncpt);
+        event.wait_handle = {};
+    }
+    event.fails++;
+    event.status.store(EventState::Cancelled, std::memory_order_release);
+    event.kevent->GetWritableEvent().Clear();

    return NvResult::Success;
 }

+Kernel::KEvent* nvhost_ctrl::QueryEvent(u32 event_id) {
+    const auto desired_event = SyncpointEventValue{.raw = event_id};
+
+    const bool allocated = desired_event.event_allocated.Value() != 0;
+    const u32 slot{allocated ? desired_event.partial_slot.Value()
+                             : static_cast<u32>(desired_event.slot)};
+    if (slot >= MaxNvEvents) {
+        ASSERT(false);
+        return nullptr;
+    }
+
+    const u32 syncpoint_id{allocated ? desired_event.syncpoint_id_for_allocation.Value()
+                                     : desired_event.syncpoint_id.Value()};
+
+    auto lock = NvEventsLock();
+
+    auto& event = events[slot];
+    if (event.registered && event.assigned_syncpt == syncpoint_id) {
+        ASSERT(event.kevent);
+        return event.kevent;
+    }
+    // Is this possible in hardware?
+    ASSERT_MSG(false, "Slot:{}, SyncpointID:{}, requested", slot, syncpoint_id);
+    return nullptr;
+}
+
+std::unique_lock<std::mutex> nvhost_ctrl::NvEventsLock() {
+    return std::unique_lock<std::mutex>(events_mutex);
+}
+
+void nvhost_ctrl::CreateNvEvent(u32 event_id) {
+    auto& event = events[event_id];
+    ASSERT(!event.kevent);
+    ASSERT(!event.registered);
+    ASSERT(!event.IsBeingUsed());
+    event.kevent = events_interface.CreateEvent(fmt::format("NVCTRL::NvEvent_{}", event_id));
+    event.status = EventState::Available;
+    event.registered = true;
+    const u64 mask = 1ULL << event_id;
+    event.fails = 0;
+    events_mask |= mask;
+    event.assigned_syncpt = 0;
+}
+
+void nvhost_ctrl::FreeNvEvent(u32 event_id) {
+    auto& event = events[event_id];
+    ASSERT(event.kevent);
+    ASSERT(event.registered);
+    ASSERT(!event.IsBeingUsed());
+    events_interface.FreeEvent(event.kevent);
+    event.kevent = nullptr;
+    event.status = EventState::Available;
+    event.registered = false;
+    const u64 mask = ~(1ULL << event_id);
+    events_mask &= mask;
+}
+
+u32 nvhost_ctrl::FindFreeNvEvent(u32 syncpoint_id) {
+    u32 slot{MaxNvEvents};
+    u32 free_slot{MaxNvEvents};
+    for (u32 i = 0; i < MaxNvEvents; i++) {
+        auto& event = events[i];
+        if (event.registered) {
+            if (!event.IsBeingUsed()) {
+                slot = i;
+                if (event.assigned_syncpt == syncpoint_id) {
+                    return slot;
+                }
+            }
+        } else if (free_slot == MaxNvEvents) {
+            free_slot = i;
+        }
+    }
+    if (free_slot < MaxNvEvents) {
+        CreateNvEvent(free_slot);
+        return free_slot;
+    }
+
+    if (slot < MaxNvEvents) {
+        return slot;
+    }
+
+    LOG_CRITICAL(Service_NVDRV, "Failed to allocate an event");
+    return 0;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -1,20 +1,28 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later

 #pragma once

 #include <array>
 #include <vector>
+#include "common/bit_field.h"
 #include "common/common_types.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
+#include "video_core/host1x/syncpoint_manager.h"
+
+namespace Service::Nvidia::NvCore {
+class Container;
+class SyncpointManager;
+} // namespace Service::Nvidia::NvCore

 namespace Service::Nvidia::Devices {

 class nvhost_ctrl final : public nvdevice {
 public:
    explicit nvhost_ctrl(Core::System& system_, EventInterface& events_interface_,
-                         SyncpointManager& syncpoint_manager_);
+                         NvCore::Container& core);
    ~nvhost_ctrl() override;

    NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -27,7 +35,70 @@ public:
    void OnOpen(DeviceFD fd) override;
    void OnClose(DeviceFD fd) override;

+    Kernel::KEvent* QueryEvent(u32 event_id) override;
+
+    union SyncpointEventValue {
+        u32 raw;
+
+        union {
+            BitField<0, 4, u32> partial_slot;
+            BitField<4, 28, u32> syncpoint_id;
+        };
+
+        struct {
+            u16 slot;
+            union {
+                BitField<0, 12, u16> syncpoint_id_for_allocation;
+                BitField<12, 1, u16> event_allocated;
+            };
+        };
+    };
+    static_assert(sizeof(SyncpointEventValue) == sizeof(u32));
+
 private:
+    struct InternalEvent {
+        // Mask representing registered events
+
+        // Each kernel event associated to an NV event
+        Kernel::KEvent* kevent{};
+        // The status of the current NVEvent
+        std::atomic<EventState> status{};
+
+        // Tells the NVEvent that it has failed.
+        u32 fails{};
+        // When an NVEvent is waiting on GPU interrupt, this is the sync_point
+        // associated with it.
+        u32 assigned_syncpt{};
+        // This is the value of the GPU interrupt for which the NVEvent is waiting
+        // for.
+        u32 assigned_value{};
+
+        // Tells if an NVEvent is registered or not
+        bool registered{};
+
+        // Used for waiting on a syncpoint & canceling it.
+        Tegra::Host1x::SyncpointManager::ActionHandle wait_handle{};
+
+        bool IsBeingUsed() const {
+            const auto current_status = status.load(std::memory_order_acquire);
+            return current_status == EventState::Waiting ||
+                   current_status == EventState::Cancelling ||
+                   current_status == EventState::Signalling;
+        }
+    };
+
+    std::unique_lock<std::mutex> NvEventsLock();
+
+    void CreateNvEvent(u32 event_id);
+
+    void FreeNvEvent(u32 event_id);
+
+    u32 FindFreeNvEvent(u32 syncpoint_id);
+
+    std::array<InternalEvent, MaxNvEvents> events{};
+    std::mutex events_mutex;
+    u64 events_mask{};
+
    struct IocSyncptReadParams {
        u32_le id{};
        u32_le value{};
@@ -83,27 +154,18 @@ private:
    };
    static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size");

-    struct IocCtrlEventSignalParams {
-        u32_le event_id{};
+    struct IocCtrlEventClearParams {
+        SyncpointEventValue event_id{};
    };
-    static_assert(sizeof(IocCtrlEventSignalParams) == 4,
-                  "IocCtrlEventSignalParams is incorrect size");
+    static_assert(sizeof(IocCtrlEventClearParams) == 4,
+                  "IocCtrlEventClearParams is incorrect size");

    struct IocCtrlEventWaitParams {
-        u32_le syncpt_id{};
-        u32_le threshold{};
-        s32_le timeout{};
-        u32_le value{};
-    };
-    static_assert(sizeof(IocCtrlEventWaitParams) == 16, "IocCtrlEventWaitParams is incorrect size");
-
-    struct IocCtrlEventWaitAsyncParams {
-        u32_le syncpt_id{};
-        u32_le threshold{};
+        NvFence fence{};
        u32_le timeout{};
-        u32_le value{};
+        SyncpointEventValue value{};
    };
-    static_assert(sizeof(IocCtrlEventWaitAsyncParams) == 16,
+    static_assert(sizeof(IocCtrlEventWaitParams) == 16,
                  "IocCtrlEventWaitAsyncParams is incorrect size");

    struct IocCtrlEventRegisterParams {
@@ -118,19 +180,25 @@ private:
    static_assert(sizeof(IocCtrlEventUnregisterParams) == 4,
                  "IocCtrlEventUnregisterParams is incorrect size");

-    struct IocCtrlEventKill {
+    struct IocCtrlEventUnregisterBatchParams {
        u64_le user_events{};
    };
-    static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size");
+    static_assert(sizeof(IocCtrlEventUnregisterBatchParams) == 8,
+                  "IocCtrlEventKill is incorrect size");

    NvResult NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
-    NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async);
+    NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
+                              bool is_allocation);
    NvResult IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult IocCtrlEventUnregisterBatch(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);

+    NvResult FreeEvent(u32 slot);
+
    EventInterface& events_interface;
-    SyncpointManager& syncpoint_manager;
+    NvCore::Container& core;
+    NvCore::SyncpointManager& syncpoint_manager;
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -7,11 +7,19 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
+#include "core/hle/service/nvdrv/nvdrv.h"

 namespace Service::Nvidia::Devices {

-nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system_) : nvdevice{system_} {}
-nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default;
+nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system_, EventInterface& events_interface_)
+    : nvdevice{system_}, events_interface{events_interface_} {
+    error_notifier_event = events_interface.CreateEvent("CtrlGpuErrorNotifier");
+    unknown_event = events_interface.CreateEvent("CtrlGpuUknownEvent");
+}
+nvhost_ctrl_gpu::~nvhost_ctrl_gpu() {
+    events_interface.FreeEvent(error_notifier_event);
+    events_interface.FreeEvent(unknown_event);
+}

 NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
                                 std::vector<u8>& output) {
@@ -286,4 +294,17 @@ NvResult nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u
    return NvResult::Success;
 }

+Kernel::KEvent* nvhost_ctrl_gpu::QueryEvent(u32 event_id) {
+    switch (event_id) {
+    case 1:
+        return error_notifier_event;
+    case 2:
+        return unknown_event;
+    default: {
+        LOG_CRITICAL(Service_NVDRV, "Unknown Ctrl GPU Event {}", event_id);
+    }
+    }
+    return nullptr;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -10,11 +10,15 @@
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"

+namespace Service::Nvidia {
+class EventInterface;
+}
+
 namespace Service::Nvidia::Devices {

 class nvhost_ctrl_gpu final : public nvdevice {
 public:
-    explicit nvhost_ctrl_gpu(Core::System& system_);
+    explicit nvhost_ctrl_gpu(Core::System& system_, EventInterface& events_interface_);
    ~nvhost_ctrl_gpu() override;

    NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -27,6 +31,8 @@ public:
    void OnOpen(DeviceFD fd) override;
    void OnClose(DeviceFD fd) override;

+    Kernel::KEvent* QueryEvent(u32 event_id) override;
+
 private:
    struct IoctlGpuCharacteristics {
        u32_le arch;                       // 0x120 (NVGPU_GPU_ARCH_GM200)
@@ -160,6 +166,12 @@ private:
    NvResult ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult FlushL2(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult GetGpuTime(const std::vector<u8>& input, std::vector<u8>& output);
+
+    EventInterface& events_interface;
+
+    // Events
+    Kernel::KEvent* error_notifier_event;
+    Kernel::KEvent* unknown_event;
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -5,29 +5,46 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/hle/service/nvdrv/core/container.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
+#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
-#include "core/hle/service/nvdrv/syncpoint_manager.h"
+#include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/memory.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/engines/puller.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/host1x.h"

 namespace Service::Nvidia::Devices {
 namespace {
-Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) {
-    Tegra::GPU::FenceAction result{};
+Tegra::CommandHeader BuildFenceAction(Tegra::Engines::Puller::FenceOperation op, u32 syncpoint_id) {
+    Tegra::Engines::Puller::FenceAction result{};
    result.op.Assign(op);
    result.syncpoint_id.Assign(syncpoint_id);
    return {result.raw};
 }
 } // namespace

-nvhost_gpu::nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
-                       SyncpointManager& syncpoint_manager_)
-    : nvdevice{system_}, nvmap_dev{std::move(nvmap_dev_)}, syncpoint_manager{syncpoint_manager_} {
-    channel_fence.id = syncpoint_manager_.AllocateSyncpoint();
-    channel_fence.value = system_.GPU().GetSyncpointValue(channel_fence.id);
+nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
+                       NvCore::Container& core_)
+    : nvdevice{system_}, events_interface{events_interface_}, core{core_},
+      syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
+      channel_state{system.GPU().AllocateChannel()} {
+    channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false);
+    sm_exception_breakpoint_int_report_event =
+        events_interface.CreateEvent("GpuChannelSMExceptionBreakpointInt");
+    sm_exception_breakpoint_pause_report_event =
+        events_interface.CreateEvent("GpuChannelSMExceptionBreakpointPause");
+    error_notifier_event = events_interface.CreateEvent("GpuChannelErrorNotifier");
 }

-nvhost_gpu::~nvhost_gpu() = default;
+nvhost_gpu::~nvhost_gpu() {
+    events_interface.FreeEvent(sm_exception_breakpoint_int_report_event);
+    events_interface.FreeEvent(sm_exception_breakpoint_pause_report_event);
+    events_interface.FreeEvent(error_notifier_event);
+    syncpoint_manager.FreeSyncpoint(channel_syncpoint);
+}

 NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
                            std::vector<u8>& output) {
@@ -167,9 +184,14 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8
                params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
                params.unk3);

-    channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
+    if (channel_state->initialized) {
+        LOG_CRITICAL(Service_NVDRV, "Already allocated!");
+        return NvResult::AlreadyAllocated;
+    }

-    params.fence_out = channel_fence;
+    system.GPU().InitChannel(*channel_state);
+
+    params.fence_out = syncpoint_manager.GetSyncpointFence(channel_syncpoint);

    std::memcpy(output.data(), &params, output.size());
    return NvResult::Success;
@@ -188,39 +210,37 @@ NvResult nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::ve

 static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
    return {
-        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
                                  Tegra::SubmissionMode::Increasing),
        {fence.value},
-        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
                                  Tegra::SubmissionMode::Increasing),
-        BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id),
+        BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
    };
 }

-static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence,
-                                                                   u32 add_increment) {
+static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence) {
    std::vector<Tegra::CommandHeader> result{
-        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
                                  Tegra::SubmissionMode::Increasing),
        {}};

-    for (u32 count = 0; count < add_increment; ++count) {
-        result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+    for (u32 count = 0; count < 2; ++count) {
+        result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
                                                      Tegra::SubmissionMode::Increasing));
-        result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id));
+        result.emplace_back(
+            BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
    }

    return result;
 }

-static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence,
-                                                                          u32 add_increment) {
+static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence) {
    std::vector<Tegra::CommandHeader> result{
-        Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1,
                                  Tegra::SubmissionMode::Increasing),
        {}};
-    const std::vector<Tegra::CommandHeader> increment{
-        BuildIncrementCommandList(fence, add_increment)};
+    const std::vector<Tegra::CommandHeader> increment{BuildIncrementCommandList(fence)};

    result.insert(result.end(), increment.begin(), increment.end());

@@ -234,33 +254,41 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>

    auto& gpu = system.GPU();

-    params.fence_out.id = channel_fence.id;
+    std::scoped_lock lock(channel_mutex);

-    if (params.flags.add_wait.Value() &&
-        !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
-        gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
-    }
+    const auto bind_id = channel_state->bind_id;

-    if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
-        const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0;
-        params.fence_out.value = syncpoint_manager.IncreaseSyncpoint(
-            params.fence_out.id, params.AddIncrementValue() + increment_value);
-    } else {
-        params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
-    }
+    auto& flags = params.flags;

-    gpu.PushGPUEntries(std::move(entries));
+    if (flags.fence_wait.Value()) {
+        if (flags.increment_value.Value()) {
+            return NvResult::BadParameter;
+        }

-    if (params.flags.add_increment.Value()) {
-        if (params.flags.suppress_wfi) {
-            gpu.PushGPUEntries(Tegra::CommandList{
-                BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
-        } else {
-            gpu.PushGPUEntries(Tegra::CommandList{
-                BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
+        if (!syncpoint_manager.IsFenceSignalled(params.fence)) {
+            gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence)});
        }
    }

+    params.fence.id = channel_syncpoint;
+
+    u32 increment{(flags.fence_increment.Value() != 0 ? 2 : 0) +
+                  (flags.increment_value.Value() != 0 ? params.fence.value : 0)};
+    params.fence.value = syncpoint_manager.IncrementSyncpointMaxExt(channel_syncpoint, increment);
+    gpu.PushGPUEntries(bind_id, std::move(entries));
+
+    if (flags.fence_increment.Value()) {
+        if (flags.suppress_wfi.Value()) {
+            gpu.PushGPUEntries(bind_id,
+                               Tegra::CommandList{BuildIncrementCommandList(params.fence)});
+        } else {
+            gpu.PushGPUEntries(bind_id,
+                               Tegra::CommandList{BuildIncrementWithWfiCommandList(params.fence)});
+        }
+    }
+
+    flags.raw = 0;
+
    std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
    return NvResult::Success;
 }
@@ -328,4 +356,19 @@ NvResult nvhost_gpu::ChannelSetTimeslice(const std::vector<u8>& input, std::vect
    return NvResult::Success;
 }

+Kernel::KEvent* nvhost_gpu::QueryEvent(u32 event_id) {
+    switch (event_id) {
+    case 1:
+        return sm_exception_breakpoint_int_report_event;
+    case 2:
+        return sm_exception_breakpoint_pause_report_event;
+    case 3:
+        return error_notifier_event;
+    default: {
+        LOG_CRITICAL(Service_NVDRV, "Unknown Ctrl GPU Event {}", event_id);
+    }
+    }
+    return nullptr;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -13,17 +13,31 @@
 #include "core/hle/service/nvdrv/nvdata.h"
 #include "video_core/dma_pusher.h"

-namespace Service::Nvidia {
-class SyncpointManager;
+namespace Tegra {
+namespace Control {
+struct ChannelState;
 }
+} // namespace Tegra
+
+namespace Service::Nvidia {
+
+namespace NvCore {
+class Container;
+class NvMap;
+class SyncpointManager;
+} // namespace NvCore
+
+class EventInterface;
+} // namespace Service::Nvidia

 namespace Service::Nvidia::Devices {

+class nvhost_as_gpu;
 class nvmap;
 class nvhost_gpu final : public nvdevice {
 public:
-    explicit nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
-                        SyncpointManager& syncpoint_manager_);
+    explicit nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
+                        NvCore::Container& core);
    ~nvhost_gpu() override;

    NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -36,7 +50,10 @@ public:
    void OnOpen(DeviceFD fd) override;
    void OnClose(DeviceFD fd) override;

+    Kernel::KEvent* QueryEvent(u32 event_id) override;
+
 private:
+    friend class nvhost_as_gpu;
    enum class CtxObjects : u32_le {
        Ctx2D = 0x902D,
        Ctx3D = 0xB197,
@@ -146,17 +163,13 @@ private:
        u32_le num_entries{}; // number of fence objects being submitted
        union {
            u32_le raw;
-            BitField<0, 1, u32_le> add_wait;      // append a wait sync_point to the list
-            BitField<1, 1, u32_le> add_increment; // append an increment to the list
-            BitField<2, 1, u32_le> new_hw_format; // mostly ignored
-            BitField<4, 1, u32_le> suppress_wfi;  // suppress wait for interrupt
-            BitField<8, 1, u32_le> increment;     // increment the returned fence
+            BitField<0, 1, u32_le> fence_wait;      // append a wait sync_point to the list
+            BitField<1, 1, u32_le> fence_increment; // append an increment to the list
+            BitField<2, 1, u32_le> new_hw_format;   // mostly ignored
+            BitField<4, 1, u32_le> suppress_wfi;    // suppress wait for interrupt
+            BitField<8, 1, u32_le> increment_value; // increment the returned fence
        } flags;
-        NvFence fence_out{}; // returned new fence object for others to wait on
-
-        u32 AddIncrementValue() const {
-            return flags.add_increment.Value() << 1;
-        }
+        NvFence fence{}; // returned new fence object for others to wait on
    };
    static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(NvFence),
                  "IoctlSubmitGpfifo is incorrect size");
@@ -191,9 +204,18 @@ private:
    NvResult ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);

-    std::shared_ptr<nvmap> nvmap_dev;
-    SyncpointManager& syncpoint_manager;
-    NvFence channel_fence;
+    EventInterface& events_interface;
+    NvCore::Container& core;
+    NvCore::SyncpointManager& syncpoint_manager;
+    NvCore::NvMap& nvmap;
+    std::shared_ptr<Tegra::Control::ChannelState> channel_state;
+    u32 channel_syncpoint;
+    std::mutex channel_mutex;
+
+    // Events
+    Kernel::KEvent* sm_exception_breakpoint_int_report_event;
+    Kernel::KEvent* sm_exception_breakpoint_pause_report_event;
+    Kernel::KEvent* error_notifier_event;
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -5,14 +5,14 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
 #include "video_core/renderer_base.h"

 namespace Service::Nvidia::Devices {

-nvhost_nvdec::nvhost_nvdec(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
-                           SyncpointManager& syncpoint_manager_)
-    : nvhost_nvdec_common{system_, std::move(nvmap_dev_), syncpoint_manager_} {}
+nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_)
+    : nvhost_nvdec_common{system_, core_, NvCore::ChannelType::NvDec} {}
 nvhost_nvdec::~nvhost_nvdec() = default;

 NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -21,8 +21,9 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>&
    case 0x0:
        switch (command.cmd) {
        case 0x1: {
-            if (!fd_to_id.contains(fd)) {
-                fd_to_id[fd] = next_id++;
+            auto& host1x_file = core.Host1xDeviceFile();
+            if (!host1x_file.fd_to_id.contains(fd)) {
+                host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++;
            }
            return Submit(fd, input, output);
        }
@@ -73,8 +74,9 @@ void nvhost_nvdec::OnOpen(DeviceFD fd) {

 void nvhost_nvdec::OnClose(DeviceFD fd) {
    LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
-    const auto iter = fd_to_id.find(fd);
-    if (iter != fd_to_id.end()) {
+    auto& host1x_file = core.Host1xDeviceFile();
+    const auto iter = host1x_file.fd_to_id.find(fd);
+    if (iter != host1x_file.fd_to_id.end()) {
        system.GPU().ClearCdmaInstance(iter->second);
    }
    system.AudioCore().SetNVDECActive(false);
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -10,8 +10,7 @@ namespace Service::Nvidia::Devices {

 class nvhost_nvdec final : public nvhost_nvdec_common {
 public:
-    explicit nvhost_nvdec(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
-                          SyncpointManager& syncpoint_manager_);
+    explicit nvhost_nvdec(Core::System& system_, NvCore::Container& core);
    ~nvhost_nvdec() override;

    NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -23,9 +22,6 @@ public:

    void OnOpen(DeviceFD fd) override;
    void OnClose(DeviceFD fd) override;
-
-private:
-    u32 next_id{};
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -8,10 +8,12 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/hle/service/nvdrv/core/container.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
+#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
 #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
-#include "core/hle/service/nvdrv/devices/nvmap.h"
-#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/memory.h"
+#include "video_core/host1x/host1x.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_base.h"

@@ -44,10 +46,22 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
 }
 } // Anonymous namespace

-nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
-                                         SyncpointManager& syncpoint_manager_)
-    : nvdevice{system_}, nvmap_dev{std::move(nvmap_dev_)}, syncpoint_manager{syncpoint_manager_} {}
-nvhost_nvdec_common::~nvhost_nvdec_common() = default;
+nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_,
+                                         NvCore::ChannelType channel_type_)
+    : nvdevice{system_}, core{core_}, syncpoint_manager{core.GetSyncpointManager()},
+      nvmap{core.GetNvMapFile()}, channel_type{channel_type_} {
+    auto& syncpts_accumulated = core.Host1xDeviceFile().syncpts_accumulated;
+    if (syncpts_accumulated.empty()) {
+        channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false);
+    } else {
+        channel_syncpoint = syncpts_accumulated.front();
+        syncpts_accumulated.pop_front();
+    }
+}
+
+nvhost_nvdec_common::~nvhost_nvdec_common() {
+    core.Host1xDeviceFile().syncpts_accumulated.push_back(channel_syncpoint);
+}

 NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
    IoctlSetNvmapFD params{};
@@ -84,16 +98,16 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector<u8>& input,
        for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
            const SyncptIncr& syncpt_incr = syncpt_increments[i];
            fence_thresholds[i] =
-                syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments);
+                syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments);
        }
    }
    for (const auto& cmd_buffer : command_buffers) {
-        const auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
+        const auto object = nvmap.GetHandle(cmd_buffer.memory_id);
        ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
        Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
-        system.Memory().ReadBlock(object->addr + cmd_buffer.offset, cmdlist.data(),
+        system.Memory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
                                  cmdlist.size() * sizeof(u32));
-        gpu.PushCommandBuffer(fd_to_id[fd], cmdlist);
+        gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
    }
    std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
    // Some games expect command_buffers to be written back
@@ -112,10 +126,8 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve
    std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
    LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);

-    if (device_syncpoints[params.param] == 0 && system.GPU().UseNvdec()) {
-        device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint();
-    }
-    params.value = device_syncpoints[params.param];
+    // const u32 id{NvCore::SyncpointManager::channel_syncpoints[static_cast<u32>(channel_type)]};
+    params.value = channel_syncpoint;
    std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));

    return NvResult::Success;
@@ -123,6 +135,7 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve

 NvResult nvhost_nvdec_common::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlGetWaitbase params{};
+    LOG_CRITICAL(Service_NVDRV, "called WAITBASE");
    std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
    params.value = 0; // Seems to be hard coded at 0
    std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
@@ -136,28 +149,8 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto

    SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));

-    auto& gpu = system.GPU();
-
    for (auto& cmd_buffer : cmd_buffer_handles) {
-        auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
-        if (!object) {
-            LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
-            std::memcpy(output.data(), &params, output.size());
-            return NvResult::InvalidState;
-        }
-        if (object->dma_map_addr == 0) {
-            // NVDEC and VIC memory is in the 32-bit address space
-            // MapAllocate32 will attempt to map a lower 32-bit value in the shared gpu memory space
-            const GPUVAddr low_addr = gpu.MemoryManager().MapAllocate32(object->addr, object->size);
-            object->dma_map_addr = static_cast<u32>(low_addr);
-            // Ensure that the dma_map_addr is indeed in the lower 32-bit address space.
-            ASSERT(object->dma_map_addr == low_addr);
-        }
-        if (!object->dma_map_addr) {
-            LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
-        } else {
-            cmd_buffer.map_address = object->dma_map_addr;
-        }
+        cmd_buffer.map_address = nvmap.PinHandle(cmd_buffer.map_handle);
    }
    std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
    std::memcpy(output.data() + sizeof(IoctlMapBuffer), cmd_buffer_handles.data(),
@@ -167,11 +160,16 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
 }

 NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
-    // This is intntionally stubbed.
-    // Skip unmapping buffers here, as to not break the continuity of the VP9 reference frame
-    // addresses, and risk invalidating data before the async GPU thread is done with it
+    IoctlMapBuffer params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
+    std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
+
+    SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
+    for (auto& cmd_buffer : cmd_buffer_handles) {
+        nvmap.UnpinHandle(cmd_buffer.map_handle);
+    }
+
    std::memset(output.data(), 0, output.size());
-    LOG_DEBUG(Service_NVDRV, "(STUBBED) called");
    return NvResult::Success;
 }

@@ -182,4 +180,9 @@ NvResult nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input,
    return NvResult::Success;
 }

+Kernel::KEvent* nvhost_nvdec_common::QueryEvent(u32 event_id) {
+    LOG_CRITICAL(Service_NVDRV, "Unknown HOSTX1 Event {}", event_id);
+    return nullptr;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -3,21 +3,26 @@

 #pragma once

+#include <deque>
 #include <vector>
 #include "common/common_types.h"
 #include "common/swap.h"
+#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"

 namespace Service::Nvidia {
-class SyncpointManager;
+
+namespace NvCore {
+class Container;
+class NvMap;
+} // namespace NvCore

 namespace Devices {
-class nvmap;

 class nvhost_nvdec_common : public nvdevice {
 public:
-    explicit nvhost_nvdec_common(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
-                                 SyncpointManager& syncpoint_manager_);
+    explicit nvhost_nvdec_common(Core::System& system_, NvCore::Container& core,
+                                 NvCore::ChannelType channel_type);
    ~nvhost_nvdec_common() override;

 protected:
@@ -110,11 +115,15 @@ protected:
    NvResult UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);

-    std::unordered_map<DeviceFD, u32> fd_to_id{};
+    Kernel::KEvent* QueryEvent(u32 event_id) override;
+
+    u32 channel_syncpoint;
    s32_le nvmap_fd{};
    u32_le submit_timeout{};
-    std::shared_ptr<nvmap> nvmap_dev;
-    SyncpointManager& syncpoint_manager;
+    NvCore::Container& core;
+    NvCore::SyncpointManager& syncpoint_manager;
+    NvCore::NvMap& nvmap;
+    NvCore::ChannelType channel_type;
    std::array<u32, MaxSyncPoints> device_syncpoints{};
 };
 }; // namespace Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -4,13 +4,14 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/devices/nvhost_vic.h"
 #include "video_core/renderer_base.h"

 namespace Service::Nvidia::Devices {
-nvhost_vic::nvhost_vic(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
-                       SyncpointManager& syncpoint_manager_)
-    : nvhost_nvdec_common{system_, std::move(nvmap_dev_), syncpoint_manager_} {}
+
+nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_)
+    : nvhost_nvdec_common{system_, core_, NvCore::ChannelType::VIC} {}

 nvhost_vic::~nvhost_vic() = default;

@@ -19,11 +20,13 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& i
    switch (command.group) {
    case 0x0:
        switch (command.cmd) {
-        case 0x1:
-            if (!fd_to_id.contains(fd)) {
-                fd_to_id[fd] = next_id++;
+        case 0x1: {
+            auto& host1x_file = core.Host1xDeviceFile();
+            if (!host1x_file.fd_to_id.contains(fd)) {
+                host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++;
            }
            return Submit(fd, input, output);
+        }
        case 0x2:
            return GetSyncpoint(input, output);
        case 0x3:
@@ -67,8 +70,9 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& i
 void nvhost_vic::OnOpen(DeviceFD fd) {}

 void nvhost_vic::OnClose(DeviceFD fd) {
-    const auto iter = fd_to_id.find(fd);
-    if (iter != fd_to_id.end()) {
+    auto& host1x_file = core.Host1xDeviceFile();
+    const auto iter = host1x_file.fd_to_id.find(fd);
+    if (iter != host1x_file.fd_to_id.end()) {
        system.GPU().ClearCdmaInstance(iter->second);
    }
 }
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -9,8 +9,7 @@ namespace Service::Nvidia::Devices {

 class nvhost_vic final : public nvhost_nvdec_common {
 public:
-    explicit nvhost_vic(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
-                        SyncpointManager& syncpoint_manager_);
+    explicit nvhost_vic(Core::System& system_, NvCore::Container& core);
    ~nvhost_vic();

    NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -22,8 +21,5 @@ public:

    void OnOpen(DeviceFD fd) override;
    void OnClose(DeviceFD fd) override;
-
-private:
-    u32 next_id{};
 };
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -2,19 +2,26 @@
 // SPDX-License-Identifier: GPL-2.0-or-later

 #include <algorithm>
+#include <bit>
 #include <cstring>

+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
+#include "core/hle/kernel/k_page_table.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/service/nvdrv/core/container.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
+#include "core/memory.h"
+
+using Core::Memory::YUZU_PAGESIZE;

 namespace Service::Nvidia::Devices {

-nvmap::nvmap(Core::System& system_) : nvdevice{system_} {
-    // Handle 0 appears to be used when remapping, so we create a placeholder empty nvmap object to
-    // represent this.
-    CreateObject(0);
-}
+nvmap::nvmap(Core::System& system_, NvCore::Container& container_)
+    : nvdevice{system_}, container{container_}, file{container.GetNvMapFile()} {}

 nvmap::~nvmap() = default;

@@ -62,39 +69,21 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
 void nvmap::OnOpen(DeviceFD fd) {}
 void nvmap::OnClose(DeviceFD fd) {}

-VAddr nvmap::GetObjectAddress(u32 handle) const {
-    auto object = GetObject(handle);
-    ASSERT(object);
-    ASSERT(object->status == Object::Status::Allocated);
-    return object->addr;
-}
-
-u32 nvmap::CreateObject(u32 size) {
-    // Create a new nvmap object and obtain a handle to it.
-    auto object = std::make_shared<Object>();
-    object->id = next_id++;
-    object->size = size;
-    object->status = Object::Status::Created;
-    object->refcount = 1;
-
-    const u32 handle = next_handle++;
-
-    handles.insert_or_assign(handle, std::move(object));
-
-    return handle;
-}
-
 NvResult nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
    IocCreateParams params;
    std::memcpy(&params, input.data(), sizeof(params));
-    LOG_DEBUG(Service_NVDRV, "size=0x{:08X}", params.size);
+    LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size);

-    if (!params.size) {
-        LOG_ERROR(Service_NVDRV, "Size is 0");
-        return NvResult::BadValue;
+    std::shared_ptr<NvCore::NvMap::Handle> handle_description{};
+    auto result =
+        file.CreateHandle(Common::AlignUp(params.size, YUZU_PAGESIZE), handle_description);
+    if (result != NvResult::Success) {
+        LOG_CRITICAL(Service_NVDRV, "Failed to create Object");
+        return result;
    }
-
-    params.handle = CreateObject(params.size);
+    handle_description->orig_size = params.size; // Orig size is the unaligned size
+    params.handle = handle_description->id;
+    LOG_DEBUG(Service_NVDRV, "handle: {}, size: 0x{:X}", handle_description->id, params.size);

    std::memcpy(output.data(), &params, sizeof(params));
    return NvResult::Success;
@@ -103,63 +92,68 @@ NvResult nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output)
 NvResult nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
    IocAllocParams params;
    std::memcpy(&params, input.data(), sizeof(params));
-    LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr);
+    LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address);

    if (!params.handle) {
-        LOG_ERROR(Service_NVDRV, "Handle is 0");
+        LOG_CRITICAL(Service_NVDRV, "Handle is 0");
        return NvResult::BadValue;
    }

    if ((params.align - 1) & params.align) {
-        LOG_ERROR(Service_NVDRV, "Incorrect alignment used, alignment={:08X}", params.align);
+        LOG_CRITICAL(Service_NVDRV, "Incorrect alignment used, alignment={:08X}", params.align);
        return NvResult::BadValue;
    }

-    const u32 min_alignment = 0x1000;
-    if (params.align < min_alignment) {
-        params.align = min_alignment;
+    // Force page size alignment at a minimum
+    if (params.align < YUZU_PAGESIZE) {
+        params.align = YUZU_PAGESIZE;
    }

-    auto object = GetObject(params.handle);
-    if (!object) {
-        LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
+    auto handle_description{file.GetHandle(params.handle)};
+    if (!handle_description) {
+        LOG_CRITICAL(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
        return NvResult::BadValue;
    }

-    if (object->status == Object::Status::Allocated) {
-        LOG_ERROR(Service_NVDRV, "Object is already allocated, handle={:08X}", params.handle);
+    if (handle_description->allocated) {
+        LOG_CRITICAL(Service_NVDRV, "Object is already allocated, handle={:08X}", params.handle);
        return NvResult::InsufficientMemory;
    }

-    object->flags = params.flags;
-    object->align = params.align;
-    object->kind = params.kind;
-    object->addr = params.addr;
-    object->status = Object::Status::Allocated;
-
+    const auto result =
+        handle_description->Alloc(params.flags, params.align, params.kind, params.address);
+    if (result != NvResult::Success) {
+        LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle);
+        return result;
+    }
+    ASSERT(system.CurrentProcess()
+               ->PageTable()
+               .LockForDeviceAddressSpace(handle_description->address, handle_description->size)
+               .IsSuccess());
    std::memcpy(output.data(), &params, sizeof(params));
-    return NvResult::Success;
+    return result;
 }

 NvResult nvmap::IocGetId(const std::vector<u8>& input, std::vector<u8>& output) {
    IocGetIdParams params;
    std::memcpy(&params, input.data(), sizeof(params));

-    LOG_WARNING(Service_NVDRV, "called");
+    LOG_DEBUG(Service_NVDRV, "called");

+    // See the comment in FromId for extra info on this function
    if (!params.handle) {
-        LOG_ERROR(Service_NVDRV, "Handle is zero");
+        LOG_CRITICAL(Service_NVDRV, "Error!");
        return NvResult::BadValue;
    }

-    auto object = GetObject(params.handle);
-    if (!object) {
-        LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
-        return NvResult::BadValue;
+    auto handle_description{file.GetHandle(params.handle)};
+    if (!handle_description) {
+        LOG_CRITICAL(Service_NVDRV, "Error!");
+        return NvResult::AccessDenied; // This will always return EPERM irrespective of if the
+                                       // handle exists or not
    }

-    params.id = object->id;
-
+    params.id = handle_description->id;
    std::memcpy(output.data(), &params, sizeof(params));
    return NvResult::Success;
 }
@@ -168,26 +162,29 @@ NvResult nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output)
    IocFromIdParams params;
    std::memcpy(&params, input.data(), sizeof(params));

-    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    LOG_DEBUG(Service_NVDRV, "called, id:{}", params.id);

-    auto itr = std::find_if(handles.begin(), handles.end(),
-                            [&](const auto& entry) { return entry.second->id == params.id; });
-    if (itr == handles.end()) {
-        LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
+    // Handles and IDs are always the same value in nvmap however IDs can be used globally given the
+    // right permissions.
+    // Since we don't plan on ever supporting multiprocess we can skip implementing handle refs and
+    // so this function just does simple validation and passes through the handle id.
+    if (!params.id) {
+        LOG_CRITICAL(Service_NVDRV, "Zero Id is invalid!");
        return NvResult::BadValue;
    }

-    auto& object = itr->second;
-    if (object->status != Object::Status::Allocated) {
-        LOG_ERROR(Service_NVDRV, "Object is not allocated, handle={:08X}", params.handle);
+    auto handle_description{file.GetHandle(params.id)};
+    if (!handle_description) {
+        LOG_CRITICAL(Service_NVDRV, "Unregistered handle!");
        return NvResult::BadValue;
    }

-    itr->second->refcount++;
-
-    // Return the existing handle instead of creating a new one.
-    params.handle = itr->first;
-
+    auto result = handle_description->Duplicate(false);
+    if (result != NvResult::Success) {
+        LOG_CRITICAL(Service_NVDRV, "Could not duplicate handle!");
+        return result;
+    }
+    params.handle = handle_description->id;
    std::memcpy(output.data(), &params, sizeof(params));
    return NvResult::Success;
 }
@@ -198,35 +195,43 @@ NvResult nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output)
    IocParamParams params;
    std::memcpy(&params, input.data(), sizeof(params));

-    LOG_DEBUG(Service_NVDRV, "(STUBBED) called type={}", params.param);
+    LOG_DEBUG(Service_NVDRV, "called type={}", params.param);

-    auto object = GetObject(params.handle);
-    if (!object) {
-        LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
+    if (!params.handle) {
+        LOG_CRITICAL(Service_NVDRV, "Invalid handle!");
        return NvResult::BadValue;
    }

-    if (object->status != Object::Status::Allocated) {
-        LOG_ERROR(Service_NVDRV, "Object is not allocated, handle={:08X}", params.handle);
+    auto handle_description{file.GetHandle(params.handle)};
+    if (!handle_description) {
+        LOG_CRITICAL(Service_NVDRV, "Not registered handle!");
        return NvResult::BadValue;
    }

-    switch (static_cast<ParamTypes>(params.param)) {
-    case ParamTypes::Size:
-        params.result = object->size;
+    switch (params.param) {
+    case HandleParameterType::Size:
+        params.result = static_cast<u32_le>(handle_description->orig_size);
        break;
-    case ParamTypes::Alignment:
-        params.result = object->align;
+    case HandleParameterType::Alignment:
+        params.result = static_cast<u32_le>(handle_description->align);
        break;
-    case ParamTypes::Heap:
-        // TODO(Subv): Seems to be a hardcoded value?
-        params.result = 0x40000000;
+    case HandleParameterType::Base:
+        params.result = static_cast<u32_le>(-22); // posix EINVAL
        break;
-    case ParamTypes::Kind:
-        params.result = object->kind;
+    case HandleParameterType::Heap:
+        if (handle_description->allocated)
+            params.result = 0x40000000;
+        else
+            params.result = 0;
+        break;
+    case HandleParameterType::Kind:
+        params.result = handle_description->kind;
+        break;
+    case HandleParameterType::IsSharedMemMapped:
+        params.result = handle_description->is_shared_mem_mapped;
        break;
    default:
-        UNIMPLEMENTED();
+        return NvResult::BadValue;
    }

    std::memcpy(output.data(), &params, sizeof(params));
@@ -234,46 +239,29 @@ NvResult nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output)
 }

 NvResult nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
-    // TODO(Subv): These flags are unconfirmed.
-    enum FreeFlags {
-        Freed = 0,
-        NotFreedYet = 1,
-    };
-
    IocFreeParams params;
    std::memcpy(&params, input.data(), sizeof(params));

-    LOG_DEBUG(Service_NVDRV, "(STUBBED) called");
+    LOG_DEBUG(Service_NVDRV, "called");

-    auto itr = handles.find(params.handle);
-    if (itr == handles.end()) {
-        LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
-        return NvResult::BadValue;
-    }
-    if (!itr->second->refcount) {
-        LOG_ERROR(
-            Service_NVDRV,
-            "There is no references to this object. The object is already freed. handle={:08X}",
-            params.handle);
-        return NvResult::BadValue;
+    if (!params.handle) {
+        LOG_CRITICAL(Service_NVDRV, "Handle null freed?");
+        return NvResult::Success;
    }

-    itr->second->refcount--;
-
-    params.size = itr->second->size;
-
-    if (itr->second->refcount == 0) {
-        params.flags = Freed;
-        // The address of the nvmap is written to the output if we're finally freeing it, otherwise
-        // 0 is written.
-        params.address = itr->second->addr;
+    if (auto freeInfo{file.FreeHandle(params.handle, false)}) {
+        ASSERT(system.CurrentProcess()
+                   ->PageTable()
+                   .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size)
+                   .IsSuccess());
+        params.address = freeInfo->address;
+        params.size = static_cast<u32>(freeInfo->size);
+        params.flags.raw = 0;
+        params.flags.map_uncached.Assign(freeInfo->was_uncached);
    } else {
-        params.flags = NotFreedYet;
-        params.address = 0;
+        // This is possible when there's internel dups or other duplicates.
    }

-    handles.erase(params.handle);
-
    std::memcpy(output.data(), &params, sizeof(params));
    return NvResult::Success;
 }
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -9,15 +9,23 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"

+namespace Service::Nvidia::NvCore {
+class Container;
+} // namespace Service::Nvidia::NvCore
+
 namespace Service::Nvidia::Devices {

 class nvmap final : public nvdevice {
 public:
-    explicit nvmap(Core::System& system_);
+    explicit nvmap(Core::System& system_, NvCore::Container& container);
    ~nvmap() override;

+    nvmap(const nvmap&) = delete;
+    nvmap& operator=(const nvmap&) = delete;
+
    NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
                    std::vector<u8>& output) override;
    NvResult Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -28,31 +36,15 @@ public:
    void OnOpen(DeviceFD fd) override;
    void OnClose(DeviceFD fd) override;

-    /// Returns the allocated address of an nvmap object given its handle.
-    VAddr GetObjectAddress(u32 handle) const;
-
-    /// Represents an nvmap object.
-    struct Object {
-        enum class Status { Created, Allocated };
-        u32 id;
-        u32 size;
-        u32 flags;
-        u32 align;
-        u8 kind;
-        VAddr addr;
-        Status status;
-        u32 refcount;
-        u32 dma_map_addr;
+    enum class HandleParameterType : u32_le {
+        Size = 1,
+        Alignment = 2,
+        Base = 3,
+        Heap = 4,
+        Kind = 5,
+        IsSharedMemMapped = 6
    };

-    std::shared_ptr<Object> GetObject(u32 handle) const {
-        auto itr = handles.find(handle);
-        if (itr != handles.end()) {
-            return itr->second;
-        }
-        return {};
-    }
-
 private:
    /// Id to use for the next handle that is created.
    u32 next_handle = 0;
@@ -60,9 +52,6 @@ private:
    /// Id to use for the next object that is created.
    u32 next_id = 0;

-    /// Mapping of currently allocated handles to the objects they represent.
-    std::unordered_map<u32, std::shared_ptr<Object>> handles;
-
    struct IocCreateParams {
        // Input
        u32_le size{};
@@ -83,11 +72,11 @@ private:
        // Input
        u32_le handle{};
        u32_le heap_mask{};
-        u32_le flags{};
+        NvCore::NvMap::Handle::Flags flags{};
        u32_le align{};
        u8 kind{};
        INSERT_PADDING_BYTES(7);
-        u64_le addr{};
+        u64_le address{};
    };
    static_assert(sizeof(IocAllocParams) == 32, "IocAllocParams has wrong size");

@@ -96,14 +85,14 @@ private:
        INSERT_PADDING_BYTES(4);
        u64_le address{};
        u32_le size{};
-        u32_le flags{};
+        NvCore::NvMap::Handle::Flags flags{};
    };
    static_assert(sizeof(IocFreeParams) == 24, "IocFreeParams has wrong size");

    struct IocParamParams {
        // Input
        u32_le handle{};
-        u32_le param{};
+        HandleParameterType param{};
        // Output
        u32_le result{};
    };
@@ -117,14 +106,15 @@ private:
    };
    static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");

-    u32 CreateObject(u32 size);
-
    NvResult IocCreate(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult IocAlloc(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult IocGetId(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult IocFromId(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult IocParam(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult IocFree(const std::vector<u8>& input, std::vector<u8>& output);
+
+    NvCore::Container& container;
+    NvCore::NvMap& file;
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/nvdata.h
+++ b/src/core/hle/service/nvdrv/nvdata.h
@@ -1,5 +1,6 @@
-// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later

 #pragma once

@@ -78,11 +79,15 @@ enum class NvResult : u32 {
    ModuleNotPresent = 0xA000E,
 };

+// obtained from
+// https://github.com/skyline-emu/skyline/blob/nvdec-dev/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.h#L47
 enum class EventState {
-    Free = 0,
-    Registered = 1,
-    Waiting = 2,
-    Busy = 3,
+    Available = 0,
+    Waiting = 1,
+    Cancelling = 2,
+    Signalling = 3,
+    Signalled = 4,
+    Cancelled = 5,
 };

 union Ioctl {
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -1,5 +1,6 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later

 #include <utility>

@@ -8,6 +9,7 @@
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/k_event.h"
 #include "core/hle/kernel/k_writable_event.h"
+#include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
 #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
 #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
@@ -15,17 +17,31 @@
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
 #include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
+#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
 #include "core/hle/service/nvdrv/devices/nvhost_nvjpg.h"
 #include "core/hle/service/nvdrv/devices/nvhost_vic.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/nvdrv/nvdrv_interface.h"
 #include "core/hle/service/nvdrv/nvmemp.h"
-#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/hle/service/nvflinger/nvflinger.h"
+#include "video_core/gpu.h"

 namespace Service::Nvidia {

+EventInterface::EventInterface(Module& module_) : module{module_}, guard{}, on_signal{} {}
+
+EventInterface::~EventInterface() = default;
+
+Kernel::KEvent* EventInterface::CreateEvent(std::string name) {
+    Kernel::KEvent* new_event = module.service_context.CreateEvent(std::move(name));
+    return new_event;
+}
+
+void EventInterface::FreeEvent(Kernel::KEvent* event) {
+    module.service_context.CloseEvent(event);
+}
+
 void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger,
                       Core::System& system) {
    auto module_ = std::make_shared<Module>(system);
@@ -38,34 +54,54 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
 }

 Module::Module(Core::System& system)
-    : syncpoint_manager{system.GPU()}, service_context{system, "nvdrv"} {
-    for (u32 i = 0; i < MaxNvEvents; i++) {
-        events_interface.events[i].event =
-            service_context.CreateEvent(fmt::format("NVDRV::NvEvent_{}", i));
-        events_interface.status[i] = EventState::Free;
-        events_interface.registered[i] = false;
-    }
-    auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
-    devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
-    devices["/dev/nvhost-gpu"] =
-        std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager);
-    devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
-    devices["/dev/nvmap"] = nvmap_dev;
-    devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
-    devices["/dev/nvhost-ctrl"] =
-        std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
-    devices["/dev/nvhost-nvdec"] =
-        std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev, syncpoint_manager);
-    devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
-    devices["/dev/nvhost-vic"] =
-        std::make_shared<Devices::nvhost_vic>(system, nvmap_dev, syncpoint_manager);
+    : service_context{system, "nvdrv"}, events_interface{*this}, container{system.Host1x()} {
+    builders["/dev/nvhost-as-gpu"] = [this, &system](DeviceFD fd) {
+        std::shared_ptr<Devices::nvdevice> device =
+            std::make_shared<Devices::nvhost_as_gpu>(system, *this, container);
+        return open_files.emplace(fd, device).first;
+    };
+    builders["/dev/nvhost-gpu"] = [this, &system](DeviceFD fd) {
+        std::shared_ptr<Devices::nvdevice> device =
+            std::make_shared<Devices::nvhost_gpu>(system, events_interface, container);
+        return open_files.emplace(fd, device).first;
+    };
+    builders["/dev/nvhost-ctrl-gpu"] = [this, &system](DeviceFD fd) {
+        std::shared_ptr<Devices::nvdevice> device =
+            std::make_shared<Devices::nvhost_ctrl_gpu>(system, events_interface);
+        return open_files.emplace(fd, device).first;
+    };
+    builders["/dev/nvmap"] = [this, &system](DeviceFD fd) {
+        std::shared_ptr<Devices::nvdevice> device =
+            std::make_shared<Devices::nvmap>(system, container);
+        return open_files.emplace(fd, device).first;
+    };
+    builders["/dev/nvdisp_disp0"] = [this, &system](DeviceFD fd) {
+        std::shared_ptr<Devices::nvdevice> device =
+            std::make_shared<Devices::nvdisp_disp0>(system, container);
+        return open_files.emplace(fd, device).first;
+    };
+    builders["/dev/nvhost-ctrl"] = [this, &system](DeviceFD fd) {
+        std::shared_ptr<Devices::nvdevice> device =
+            std::make_shared<Devices::nvhost_ctrl>(system, events_interface, container);
+        return open_files.emplace(fd, device).first;
+    };
+    builders["/dev/nvhost-nvdec"] = [this, &system](DeviceFD fd) {
+        std::shared_ptr<Devices::nvdevice> device =
+            std::make_shared<Devices::nvhost_nvdec>(system, container);
+        return open_files.emplace(fd, device).first;
+    };
+    builders["/dev/nvhost-nvjpg"] = [this, &system](DeviceFD fd) {
+        std::shared_ptr<Devices::nvdevice> device = std::make_shared<Devices::nvhost_nvjpg>(system);
+        return open_files.emplace(fd, device).first;
+    };
+    builders["/dev/nvhost-vic"] = [this, &system](DeviceFD fd) {
+        std::shared_ptr<Devices::nvdevice> device =
+            std::make_shared<Devices::nvhost_vic>(system, container);
+        return open_files.emplace(fd, device).first;
+    };
 }

-Module::~Module() {
-    for (u32 i = 0; i < MaxNvEvents; i++) {
-        service_context.CloseEvent(events_interface.events[i].event);
-    }
-}
+Module::~Module() {}

 NvResult Module::VerifyFD(DeviceFD fd) const {
    if (fd < 0) {
@@ -82,18 +118,18 @@ NvResult Module::VerifyFD(DeviceFD fd) const {
 }

 DeviceFD Module::Open(const std::string& device_name) {
-    if (devices.find(device_name) == devices.end()) {
+    auto it = builders.find(device_name);
+    if (it == builders.end()) {
        LOG_ERROR(Service_NVDRV, "Trying to open unknown device {}", device_name);
        return INVALID_NVDRV_FD;
    }

-    auto device = devices[device_name];
    const DeviceFD fd = next_fd++;
+    auto& builder = it->second;
+    auto device = builder(fd)->second;

    device->OnOpen(fd);

-    open_files[fd] = std::move(device);
-
    return fd;
 }

@@ -168,22 +204,24 @@ NvResult Module::Close(DeviceFD fd) {
    return NvResult::Success;
 }

-void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
-    for (u32 i = 0; i < MaxNvEvents; i++) {
-        if (events_interface.assigned_syncpt[i] == syncpoint_id &&
-            events_interface.assigned_value[i] == value) {
-            events_interface.LiberateEvent(i);
-            events_interface.events[i].event->GetWritableEvent().Signal();
-        }
+NvResult Module::QueryEvent(DeviceFD fd, u32 event_id, Kernel::KEvent*& event) {
+    if (fd < 0) {
+        LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
+        return NvResult::InvalidState;
    }
-}

-Kernel::KReadableEvent& Module::GetEvent(const u32 event_id) {
-    return events_interface.events[event_id].event->GetReadableEvent();
-}
+    const auto itr = open_files.find(fd);

-Kernel::KWritableEvent& Module::GetEventWriteable(const u32 event_id) {
-    return events_interface.events[event_id].event->GetWritableEvent();
+    if (itr == open_files.end()) {
+        LOG_ERROR(Service_NVDRV, "Could not find DeviceFD={}!", fd);
+        return NvResult::NotImplemented;
+    }
+
+    event = itr->second->QueryEvent(event_id);
+    if (!event) {
+        return NvResult::BadParameter;
+    }
+    return NvResult::Success;
 }

 } // namespace Service::Nvidia
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -1,16 +1,20 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later

 #pragma once

+#include <functional>
+#include <list>
 #include <memory>
+#include <string>
 #include <unordered_map>
 #include <vector>

 #include "common/common_types.h"
 #include "core/hle/service/kernel_helpers.h"
+#include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/nvdata.h"
-#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/hle/service/nvflinger/ui/fence.h"
 #include "core/hle/service/service.h"

@@ -28,81 +32,31 @@ class NVFlinger;

 namespace Service::Nvidia {

+namespace NvCore {
+class Container;
 class SyncpointManager;
+} // namespace NvCore

 namespace Devices {
 class nvdevice;
-}
+class nvhost_ctrl;
+} // namespace Devices

-/// Represents an Nvidia event
-struct NvEvent {
-    Kernel::KEvent* event{};
-    NvFence fence{};
-};
+class Module;

-struct EventInterface {
-    // Mask representing currently busy events
-    u64 events_mask{};
-    // Each kernel event associated to an NV event
-    std::array<NvEvent, MaxNvEvents> events;
-    // The status of the current NVEvent
-    std::array<EventState, MaxNvEvents> status{};
-    // Tells if an NVEvent is registered or not
-    std::array<bool, MaxNvEvents> registered{};
-    // Tells the NVEvent that it has failed.
-    std::array<bool, MaxNvEvents> failed{};
-    // When an NVEvent is waiting on GPU interrupt, this is the sync_point
-    // associated with it.
-    std::array<u32, MaxNvEvents> assigned_syncpt{};
-    // This is the value of the GPU interrupt for which the NVEvent is waiting
-    // for.
-    std::array<u32, MaxNvEvents> assigned_value{};
-    // Constant to denote an unasigned syncpoint.
-    static constexpr u32 unassigned_syncpt = 0xFFFFFFFF;
-    std::optional<u32> GetFreeEvent() const {
-        u64 mask = events_mask;
-        for (u32 i = 0; i < MaxNvEvents; i++) {
-            const bool is_free = (mask & 0x1) == 0;
-            if (is_free) {
-                if (status[i] == EventState::Registered || status[i] == EventState::Free) {
-                    return {i};
-                }
-            }
-            mask = mask >> 1;
-        }
-        return std::nullopt;
-    }
-    void SetEventStatus(const u32 event_id, EventState new_status) {
-        EventState old_status = status[event_id];
-        if (old_status == new_status) {
-            return;
-        }
-        status[event_id] = new_status;
-        if (new_status == EventState::Registered) {
-            registered[event_id] = true;
-        }
-        if (new_status == EventState::Waiting || new_status == EventState::Busy) {
-            events_mask |= (1ULL << event_id);
-        }
-    }
-    void RegisterEvent(const u32 event_id) {
-        registered[event_id] = true;
-        if (status[event_id] == EventState::Free) {
-            status[event_id] = EventState::Registered;
-        }
-    }
-    void UnregisterEvent(const u32 event_id) {
-        registered[event_id] = false;
-        if (status[event_id] == EventState::Registered) {
-            status[event_id] = EventState::Free;
-        }
-    }
-    void LiberateEvent(const u32 event_id) {
-        status[event_id] = registered[event_id] ? EventState::Registered : EventState::Free;
-        events_mask &= ~(1ULL << event_id);
-        assigned_syncpt[event_id] = unassigned_syncpt;
-        assigned_value[event_id] = 0;
-    }
+class EventInterface {
+public:
+    explicit EventInterface(Module& module_);
+    ~EventInterface();
+
+    Kernel::KEvent* CreateEvent(std::string name);
+
+    void FreeEvent(Kernel::KEvent* event);
+
+private:
+    Module& module;
+    std::mutex guard;
+    std::list<Devices::nvhost_ctrl*> on_signal;
 };

 class Module final {
@@ -112,9 +66,9 @@ public:

    /// Returns a pointer to one of the available devices, identified by its name.
    template <typename T>
-    std::shared_ptr<T> GetDevice(const std::string& name) {
-        auto itr = devices.find(name);
-        if (itr == devices.end())
+    std::shared_ptr<T> GetDevice(DeviceFD fd) {
+        auto itr = open_files.find(fd);
+        if (itr == open_files.end())
            return nullptr;
        return std::static_pointer_cast<T>(itr->second);
    }
@@ -137,28 +91,27 @@ public:
    /// Closes a device file descriptor and returns operation success.
    NvResult Close(DeviceFD fd);

-    void SignalSyncpt(const u32 syncpoint_id, const u32 value);
-
-    Kernel::KReadableEvent& GetEvent(u32 event_id);
-
-    Kernel::KWritableEvent& GetEventWriteable(u32 event_id);
+    NvResult QueryEvent(DeviceFD fd, u32 event_id, Kernel::KEvent*& event);

 private:
-    /// Manages syncpoints on the host
-    SyncpointManager syncpoint_manager;
+    friend class EventInterface;
+    friend class Service::NVFlinger::NVFlinger;

    /// Id to use for the next open file descriptor.
    DeviceFD next_fd = 1;

+    using FilesContainerType = std::unordered_map<DeviceFD, std::shared_ptr<Devices::nvdevice>>;
    /// Mapping of file descriptors to the devices they reference.
-    std::unordered_map<DeviceFD, std::shared_ptr<Devices::nvdevice>> open_files;
+    FilesContainerType open_files;

-    /// Mapping of device node names to their implementation.
-    std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices;
+    KernelHelpers::ServiceContext service_context;

    EventInterface events_interface;

-    KernelHelpers::ServiceContext service_context;
+    /// Manages syncpoints on the host
+    NvCore::Container container;
+
+    std::unordered_map<std::string, std::function<FilesContainerType::iterator(DeviceFD)>> builders;
 };

 /// Registers all NVDRV services with the specified service manager.
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -1,10 +1,12 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
+// SPDX-License-Identifier: GPL-3.0-or-later

 #include <cinttypes>
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/k_event.h"
 #include "core/hle/kernel/k_readable_event.h"
 #include "core/hle/service/nvdrv/nvdata.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
@@ -12,10 +14,6 @@

 namespace Service::Nvidia {

-void NVDRV::SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
-    nvdrv->SignalSyncpt(syncpoint_id, value);
-}
-
 void NVDRV::Open(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_NVDRV, "called");
    IPC::ResponseBuilder rb{ctx, 4};
@@ -164,8 +162,7 @@ void NVDRV::Initialize(Kernel::HLERequestContext& ctx) {
 void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};
    const auto fd = rp.Pop<DeviceFD>();
-    const auto event_id = rp.Pop<u32>() & 0x00FF;
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id);
+    const auto event_id = rp.Pop<u32>();

    if (!is_initialized) {
        ServiceError(ctx, NvResult::NotInitialized);
@@ -173,24 +170,20 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
        return;
    }

-    const auto nv_result = nvdrv->VerifyFD(fd);
-    if (nv_result != NvResult::Success) {
-        LOG_ERROR(Service_NVDRV, "Invalid FD specified DeviceFD={}!", fd);
-        ServiceError(ctx, nv_result);
-        return;
-    }
+    Kernel::KEvent* event = nullptr;
+    NvResult result = nvdrv->QueryEvent(fd, event_id, event);

-    if (event_id < MaxNvEvents) {
+    if (result == NvResult::Success) {
        IPC::ResponseBuilder rb{ctx, 3, 1};
        rb.Push(ResultSuccess);
-        auto& event = nvdrv->GetEvent(event_id);
-        event.Clear();
-        rb.PushCopyObjects(event);
+        auto& readable_event = event->GetReadableEvent();
+        rb.PushCopyObjects(readable_event);
        rb.PushEnum(NvResult::Success);
    } else {
+        LOG_ERROR(Service_NVDRV, "Invalid event request!");
        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(ResultSuccess);
-        rb.PushEnum(NvResult::BadParameter);
+        rb.PushEnum(result);
    }
 }

--- a/src/core/hle/service/nvdrv/nvdrv_interface.h
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.h
@@ -18,8 +18,6 @@ public:
    explicit NVDRV(Core::System& system_, std::shared_ptr<Module> nvdrv_, const char* name);
    ~NVDRV() override;

-    void SignalGPUInterruptSyncpt(u32 syncpoint_id, u32 value);
-
 private:
    void Open(Kernel::HLERequestContext& ctx);
    void Ioctl1(Kernel::HLERequestContext& ctx);
--- a/src/core/hle/service/nvdrv/syncpoint_manager.cpp
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.cpp
@@ -1,38 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include "common/assert.h"
-#include "core/hle/service/nvdrv/syncpoint_manager.h"
-#include "video_core/gpu.h"
-
-namespace Service::Nvidia {
-
-SyncpointManager::SyncpointManager(Tegra::GPU& gpu_) : gpu{gpu_} {}
-
-SyncpointManager::~SyncpointManager() = default;
-
-u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) {
-    syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id);
-    return GetSyncpointMin(syncpoint_id);
-}
-
-u32 SyncpointManager::AllocateSyncpoint() {
-    for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) {
-        if (!syncpoints[syncpoint_id].is_allocated) {
-            syncpoints[syncpoint_id].is_allocated = true;
-            return syncpoint_id;
-        }
-    }
-    ASSERT_MSG(false, "No more available syncpoints!");
-    return {};
-}
-
-u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) {
-    for (u32 index = 0; index < value; ++index) {
-        syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed);
-    }
-
-    return GetSyncpointMax(syncpoint_id);
-}
-
-} // namespace Service::Nvidia
--- a/src/core/hle/service/nvdrv/syncpoint_manager.h
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.h
@@ -1,84 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <array>
-#include <atomic>
-
-#include "common/common_types.h"
-#include "core/hle/service/nvdrv/nvdata.h"
-
-namespace Tegra {
-class GPU;
-}
-
-namespace Service::Nvidia {
-
-class SyncpointManager final {
-public:
-    explicit SyncpointManager(Tegra::GPU& gpu_);
-    ~SyncpointManager();
-
-    /**
-     * Returns true if the specified syncpoint is expired for the given value.
-     * @param syncpoint_id Syncpoint ID to check.
-     * @param value Value to check against the specified syncpoint.
-     * @returns True if the specified syncpoint is expired for the given value, otherwise False.
-     */
-    bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const {
-        return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value);
-    }
-
-    /**
-     * Gets the lower bound for the specified syncpoint.
-     * @param syncpoint_id Syncpoint ID to get the lower bound for.
-     * @returns The lower bound for the specified syncpoint.
-     */
-    u32 GetSyncpointMin(u32 syncpoint_id) const {
-        return syncpoints.at(syncpoint_id).min.load(std::memory_order_relaxed);
-    }
-
-    /**
-     * Gets the uper bound for the specified syncpoint.
-     * @param syncpoint_id Syncpoint ID to get the upper bound for.
-     * @returns The upper bound for the specified syncpoint.
-     */
-    u32 GetSyncpointMax(u32 syncpoint_id) const {
-        return syncpoints.at(syncpoint_id).max.load(std::memory_order_relaxed);
-    }
-
-    /**
-     * Refreshes the minimum value for the specified syncpoint.
-     * @param syncpoint_id Syncpoint ID to be refreshed.
-     * @returns The new syncpoint minimum value.
-     */
-    u32 RefreshSyncpoint(u32 syncpoint_id);
-
-    /**
-     * Allocates a new syncoint.
-     * @returns The syncpoint ID for the newly allocated syncpoint.
-     */
-    u32 AllocateSyncpoint();
-
-    /**
-     * Increases the maximum value for the specified syncpoint.
-     * @param syncpoint_id Syncpoint ID to be increased.
-     * @param value Value to increase the specified syncpoint by.
-     * @returns The new syncpoint maximum value.
-     */
-    u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value);
-
-private:
-    struct Syncpoint {
-        std::atomic<u32> min;
-        std::atomic<u32> max;
-        std::atomic<bool> is_allocated;
-    };
-
-    std::array<Syncpoint, MaxSyncPoints> syncpoints{};
-
-    Tegra::GPU& gpu;
-};
-
-} // namespace Service::Nvidia
--- a/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue_consumer.cpp
@@ -5,15 +5,18 @@
 // https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/gui/BufferQueueConsumer.cpp

 #include "common/logging/log.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvflinger/buffer_item.h"
 #include "core/hle/service/nvflinger/buffer_queue_consumer.h"
 #include "core/hle/service/nvflinger/buffer_queue_core.h"
 #include "core/hle/service/nvflinger/producer_listener.h"
+#include "core/hle/service/nvflinger/ui/graphic_buffer.h"

 namespace Service::android {

-BufferQueueConsumer::BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_)
-    : core{std::move(core_)}, slots{core->slots} {}
+BufferQueueConsumer::BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_,
+                                         Service::Nvidia::NvCore::NvMap& nvmap_)
+    : core{std::move(core_)}, slots{core->slots}, nvmap(nvmap_) {}

 BufferQueueConsumer::~BufferQueueConsumer() = default;

@@ -133,6 +136,8 @@ Status BufferQueueConsumer::ReleaseBuffer(s32 slot, u64 frame_number, const Fenc

        slots[slot].buffer_state = BufferState::Free;

+        nvmap.FreeHandle(slots[slot].graphic_buffer->BufferId(), true);
+
        listener = core->connected_producer_listener;

        LOG_DEBUG(Service_NVFlinger, "releasing slot {}", slot);
--- a/src/core/hle/service/nvflinger/buffer_queue_consumer.h
+++ b/src/core/hle/service/nvflinger/buffer_queue_consumer.h
@@ -13,6 +13,10 @@
 #include "core/hle/service/nvflinger/buffer_queue_defs.h"
 #include "core/hle/service/nvflinger/status.h"

+namespace Service::Nvidia::NvCore {
+class NvMap;
+} // namespace Service::Nvidia::NvCore
+
 namespace Service::android {

 class BufferItem;
@@ -21,7 +25,8 @@ class IConsumerListener;

 class BufferQueueConsumer final {
 public:
-    explicit BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_);
+    explicit BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_,
+                                 Service::Nvidia::NvCore::NvMap& nvmap_);
    ~BufferQueueConsumer();

    Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present);
@@ -32,6 +37,7 @@ public:
 private:
    std::shared_ptr<BufferQueueCore> core;
    BufferQueueDefs::SlotsType& slots;
+    Service::Nvidia::NvCore::NvMap& nvmap;
 };

 } // namespace Service::android
--- a/src/core/hle/service/nvflinger/buffer_queue_producer.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue_producer.cpp
@@ -14,7 +14,7 @@
 #include "core/hle/kernel/k_writable_event.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/service/kernel_helpers.h"
-#include "core/hle/service/nvdrv/nvdrv.h"
+#include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvflinger/buffer_queue_core.h"
 #include "core/hle/service/nvflinger/buffer_queue_producer.h"
 #include "core/hle/service/nvflinger/consumer_listener.h"
@@ -26,8 +26,10 @@
 namespace Service::android {

 BufferQueueProducer::BufferQueueProducer(Service::KernelHelpers::ServiceContext& service_context_,
-                                         std::shared_ptr<BufferQueueCore> buffer_queue_core_)
-    : service_context{service_context_}, core{std::move(buffer_queue_core_)}, slots(core->slots) {
+                                         std::shared_ptr<BufferQueueCore> buffer_queue_core_,
+                                         Service::Nvidia::NvCore::NvMap& nvmap_)
+    : service_context{service_context_}, core{std::move(buffer_queue_core_)}, slots(core->slots),
+      nvmap(nvmap_) {
    buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent");
 }

@@ -530,6 +532,8 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
        item.is_droppable = core->dequeue_buffer_cannot_block || async;
        item.swap_interval = swap_interval;

+        nvmap.DuplicateHandle(item.graphic_buffer->BufferId(), true);
+
        sticky_transform = sticky_transform_;

        if (core->queue.empty()) {
--- a/src/core/hle/service/nvflinger/buffer_queue_producer.h
+++ b/src/core/hle/service/nvflinger/buffer_queue_producer.h
@@ -31,6 +31,10 @@ namespace Service::KernelHelpers {
 class ServiceContext;
 } // namespace Service::KernelHelpers

+namespace Service::Nvidia::NvCore {
+class NvMap;
+} // namespace Service::Nvidia::NvCore
+
 namespace Service::android {

 class BufferQueueCore;
@@ -39,7 +43,8 @@ class IProducerListener;
 class BufferQueueProducer final : public IBinder {
 public:
    explicit BufferQueueProducer(Service::KernelHelpers::ServiceContext& service_context_,
-                                 std::shared_ptr<BufferQueueCore> buffer_queue_core_);
+                                 std::shared_ptr<BufferQueueCore> buffer_queue_core_,
+                                 Service::Nvidia::NvCore::NvMap& nvmap_);
    ~BufferQueueProducer();

    void Transact(Kernel::HLERequestContext& ctx, android::TransactionId code, u32 flags) override;
@@ -78,6 +83,8 @@ private:
    s32 next_callback_ticket{};
    s32 current_callback_ticket{};
    std::condition_variable_any callback_condition;
+
+    Service::Nvidia::NvCore::NvMap& nvmap;
 };

 } // namespace Service::android
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -24,6 +24,8 @@
 #include "core/hle/service/vi/layer/vi_layer.h"
 #include "core/hle/service/vi/vi_results.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/syncpoint_manager.h"

 namespace Service::NVFlinger {

@@ -31,7 +33,7 @@ constexpr auto frame_ns = std::chrono::nanoseconds{1000000000 / 60};

 void NVFlinger::SplitVSync(std::stop_token stop_token) {
    system.RegisterHostThread();
-    std::string name = "yuzu:VSyncThread";
+    std::string name = "VSyncThread";
    MicroProfileOnThreadCreate(name.c_str());

    // Cleanup
@@ -105,10 +107,15 @@ NVFlinger::~NVFlinger() {
            display.GetLayer(layer).Core().NotifyShutdown();
        }
    }
+
+    if (nvdrv) {
+        nvdrv->Close(disp_fd);
+    }
 }

 void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
    nvdrv = std::move(instance);
+    disp_fd = nvdrv->Open("/dev/nvdisp_disp0");
 }

 std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
@@ -142,7 +149,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {

 void NVFlinger::CreateLayerAtId(VI::Display& display, u64 layer_id) {
    const auto buffer_id = next_buffer_queue_id++;
-    display.CreateLayer(layer_id, buffer_id);
+    display.CreateLayer(layer_id, buffer_id, nvdrv->container);
 }

 void NVFlinger::CloseLayer(u64 layer_id) {
@@ -262,30 +269,24 @@ void NVFlinger::Compose() {
            return; // We are likely shutting down
        }

-        auto& gpu = system.GPU();
-        const auto& multi_fence = buffer.fence;
-        guard->unlock();
-        for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
-            const auto& fence = multi_fence.fences[fence_id];
-            gpu.WaitFence(fence.id, fence.value);
-        }
-        guard->lock();
-
-        MicroProfileFlip();
-
        // Now send the buffer to the GPU for drawing.
        // TODO(Subv): Support more than just disp0. The display device selection is probably based
        // on which display we're drawing (Default, Internal, External, etc)
-        auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
+        auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd);
        ASSERT(nvdisp);

+        guard->unlock();
        Common::Rectangle<int> crop_rect{
            static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()),
            static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())};

        nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(),
                     igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(),
-                     static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect);
+                     static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect,
+                     buffer.fence.fences, buffer.fence.num_fences);
+
+        MicroProfileFlip();
+        guard->lock();

        swap_interval = buffer.swap_interval;

--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -116,6 +116,7 @@ private:
    void SplitVSync(std::stop_token stop_token);

    std::shared_ptr<Nvidia::Module> nvdrv;
+    s32 disp_fd;

    std::list<VI::Display> displays;

--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -12,6 +12,7 @@
 #include "core/hle/kernel/k_readable_event.h"
 #include "core/hle/kernel/k_writable_event.h"
 #include "core/hle/service/kernel_helpers.h"
+#include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvflinger/buffer_item_consumer.h"
 #include "core/hle/service/nvflinger/buffer_queue_consumer.h"
 #include "core/hle/service/nvflinger/buffer_queue_core.h"
@@ -29,11 +30,13 @@ struct BufferQueue {
    std::unique_ptr<android::BufferQueueConsumer> consumer;
 };

-static BufferQueue CreateBufferQueue(KernelHelpers::ServiceContext& service_context) {
+static BufferQueue CreateBufferQueue(KernelHelpers::ServiceContext& service_context,
+                                     Service::Nvidia::NvCore::NvMap& nvmap) {
    auto buffer_queue_core = std::make_shared<android::BufferQueueCore>();
-    return {buffer_queue_core,
-            std::make_unique<android::BufferQueueProducer>(service_context, buffer_queue_core),
-            std::make_unique<android::BufferQueueConsumer>(buffer_queue_core)};
+    return {
+        buffer_queue_core,
+        std::make_unique<android::BufferQueueProducer>(service_context, buffer_queue_core, nvmap),
+        std::make_unique<android::BufferQueueConsumer>(buffer_queue_core, nvmap)};
 }

 Display::Display(u64 id, std::string name_,
@@ -74,10 +77,11 @@ void Display::SignalVSyncEvent() {
    vsync_event->GetWritableEvent().Signal();
 }

-void Display::CreateLayer(u64 layer_id, u32 binder_id) {
+void Display::CreateLayer(u64 layer_id, u32 binder_id,
+                          Service::Nvidia::NvCore::Container& nv_core) {
    ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");

-    auto [core, producer, consumer] = CreateBufferQueue(service_context);
+    auto [core, producer, consumer] = CreateBufferQueue(service_context, nv_core.GetNvMapFile());

    auto buffer_item_consumer = std::make_shared<android::BufferItemConsumer>(std::move(consumer));
    buffer_item_consumer->Connect(false);
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -27,6 +27,11 @@ namespace Service::NVFlinger {
 class HosBinderDriverServer;
 }

+namespace Service::Nvidia::NvCore {
+class Container;
+class NvMap;
+} // namespace Service::Nvidia::NvCore
+
 namespace Service::VI {

 class Layer;
@@ -93,7 +98,7 @@ public:
    /// @param layer_id The ID to assign to the created layer.
    /// @param binder_id The ID assigned to the buffer queue.
    ///
-    void CreateLayer(u64 layer_id, u32 binder_id);
+    void CreateLayer(u64 layer_id, u32 binder_id, Service::Nvidia::NvCore::Container& core);

    /// Closes and removes a layer from this display with the given ID.
    ///
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -58,6 +58,7 @@ static_assert(sizeof(DisplayInfo) == 0x60, "DisplayInfo has wrong size");
 class NativeWindow final {
 public:
    constexpr explicit NativeWindow(u32 id_) : id{id_} {}
+    constexpr explicit NativeWindow(const NativeWindow& other) = default;

 private:
    const u32 magic = 2;
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -551,6 +551,11 @@ struct Memory::Impl {
            []() {});
    }

+    [[nodiscard]] u8* GetPointerSilent(const VAddr vaddr) const {
+        return GetPointerImpl(
+            vaddr, []() {}, []() {});
+    }
+
    /**
     * Reads a particular data type out of memory at the given virtual address.
     *
@@ -686,6 +691,10 @@ u8* Memory::GetPointer(VAddr vaddr) {
    return impl->GetPointer(vaddr);
 }

+u8* Memory::GetPointerSilent(VAddr vaddr) {
+    return impl->GetPointerSilent(vaddr);
+}
+
 const u8* Memory::GetPointer(VAddr vaddr) const {
    return impl->GetPointer(vaddr);
 }
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -114,6 +114,7 @@ public:
     *          If the address is not valid, nullptr will be returned.
     */
    u8* GetPointer(VAddr vaddr);
+    u8* GetPointerSilent(VAddr vaddr);

    template <typename T>
    T* GetPointer(VAddr vaddr) {
--- a/src/dedicated_room/CMakeLists.txt
+++ b/src/dedicated_room/CMakeLists.txt
@@ -23,5 +23,5 @@ endif()
 target_link_libraries(yuzu-room PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads)

 if(UNIX AND NOT APPLE)
-    install(TARGETS yuzu-room RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
+    install(TARGETS yuzu-room)
 endif()
--- a/src/input_common/drivers/gc_adapter.cpp
+++ b/src/input_common/drivers/gc_adapter.cpp
@@ -90,7 +90,7 @@ GCAdapter::~GCAdapter() {

 void GCAdapter::AdapterInputThread(std::stop_token stop_token) {
    LOG_DEBUG(Input, "Input thread started");
-    Common::SetCurrentThreadName("yuzu:input:GCAdapter");
+    Common::SetCurrentThreadName("GCAdapter");
    s32 payload_size{};
    AdapterPayload adapter_payload{};

@@ -214,7 +214,7 @@ void GCAdapter::UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_
 }

 void GCAdapter::AdapterScanThread(std::stop_token stop_token) {
-    Common::SetCurrentThreadName("yuzu:input:ScanGCAdapter");
+    Common::SetCurrentThreadName("ScanGCAdapter");
    usb_adapter_handle = nullptr;
    pads = {};
    while (!stop_token.stop_requested() && !Setup()) {
--- a/src/input_common/drivers/mouse.cpp
+++ b/src/input_common/drivers/mouse.cpp
@@ -37,7 +37,7 @@ Mouse::Mouse(std::string input_engine_) : InputEngine(std::move(input_engine_))
 }

 void Mouse::UpdateThread(std::stop_token stop_token) {
-    Common::SetCurrentThreadName("yuzu:input:Mouse");
+    Common::SetCurrentThreadName("Mouse");
    constexpr int update_time = 10;
    while (!stop_token.stop_requested()) {
        if (Settings::values.mouse_panning && !Settings::values.mouse_enabled) {
--- a/src/input_common/drivers/sdl_driver.cpp
+++ b/src/input_common/drivers/sdl_driver.cpp
@@ -436,7 +436,7 @@ SDLDriver::SDLDriver(std::string input_engine_) : InputEngine(std::move(input_en
    initialized = true;
    if (start_thread) {
        poll_thread = std::thread([this] {
-            Common::SetCurrentThreadName("yuzu:input:SDL");
+            Common::SetCurrentThreadName("SDL_MainLoop");
            using namespace std::chrono_literals;
            while (initialized) {
                SDL_PumpEvents();
@@ -444,7 +444,7 @@ SDLDriver::SDLDriver(std::string input_engine_) : InputEngine(std::move(input_en
            }
        });
        vibration_thread = std::thread([this] {
-            Common::SetCurrentThreadName("yuzu:input:SDL_Vibration");
+            Common::SetCurrentThreadName("SDL_Vibration");
            using namespace std::chrono_literals;
            while (initialized) {
                SendVibrations();
--- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -13,9 +13,6 @@ namespace Shader::Backend::GLASM {
 namespace {
 void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
             std::string_view size) {
-    if (!binding.IsImmediate()) {
-        throw NotImplementedException("Indirect constant buffer loading");
-    }
    const Register ret{ctx.reg_alloc.Define(inst)};
    if (offset.type == Type::U32) {
        // Avoid reading arrays out of bounds, matching hardware's behavior
@@ -24,7 +21,27 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU
            return;
        }
    }
-    ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
+
+    if (binding.IsImmediate()) {
+        ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
+        return;
+    }
+
+    const ScalarU32 idx{ctx.reg_alloc.Consume(binding)};
+    for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
+        ctx.Add("SEQ.S.CC RC.x,{},{};"
+                "IF NE.x;"
+                "LDC.{} {},c{}[{}];",
+                idx, i, size, ret, i, offset);
+
+        if (i != Info::MAX_INDIRECT_CBUFS - 1) {
+            ctx.Add("ELSE;");
+        }
+    }
+
+    for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
+        ctx.Add("ENDIF;");
+    }
 }

 bool IsInputArray(Stage stage) {
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -964,9 +964,9 @@ private:
        demote_endif_node.type = Type::EndIf;
        demote_endif_node.data.end_if.merge = return_block_it->data.block;

-        asl.insert(return_block_it, demote_endif_node);
-        asl.insert(return_block_it, demote_node);
-        asl.insert(return_block_it, demote_if_node);
+        const auto next_it_1 = asl.insert(return_block_it, demote_endif_node);
+        const auto next_it_2 = asl.insert(next_it_1, demote_node);
+        asl.insert(next_it_2, demote_if_node);
    }

    ObjectPool<Statement>& stmt_pool;
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -19,8 +19,10 @@ namespace {
 struct ConstBufferAddr {
    u32 index;
    u32 offset;
+    u32 shift_left;
    u32 secondary_index;
    u32 secondary_offset;
+    u32 secondary_shift_left;
    IR::U32 dynamic_offset;
    u32 count;
    bool has_secondary;
@@ -172,19 +174,41 @@ bool IsTextureInstruction(const IR::Inst& inst) {
    return IndexedInstruction(inst) != IR::Opcode::Void;
 }

-std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst);
+std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env);

-std::optional<ConstBufferAddr> Track(const IR::Value& value) {
-    return IR::BreadthFirstSearch(value, TryGetConstBuffer);
+std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env) {
+    return IR::BreadthFirstSearch(
+        value, [&env](const IR::Inst* inst) { return TryGetConstBuffer(inst, env); });
 }

-std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
+std::optional<u32> TryGetConstant(IR::Value& value, Environment& env) {
+    const IR::Inst* inst = value.InstRecursive();
+    if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
+        return std::nullopt;
+    }
+    const IR::Value index{inst->Arg(0)};
+    const IR::Value offset{inst->Arg(1)};
+    if (!index.IsImmediate()) {
+        return std::nullopt;
+    }
+    if (!offset.IsImmediate()) {
+        return std::nullopt;
+    }
+    const auto index_number = index.U32();
+    if (index_number != 1) {
+        return std::nullopt;
+    }
+    const auto offset_number = offset.U32();
+    return env.ReadCbufValue(index_number, offset_number);
+}
+
+std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env) {
    switch (inst->GetOpcode()) {
    default:
        return std::nullopt;
    case IR::Opcode::BitwiseOr32: {
-        std::optional lhs{Track(inst->Arg(0))};
-        std::optional rhs{Track(inst->Arg(1))};
+        std::optional lhs{Track(inst->Arg(0), env)};
+        std::optional rhs{Track(inst->Arg(1), env)};
        if (!lhs || !rhs) {
            return std::nullopt;
        }
@@ -194,19 +218,62 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
        if (lhs->count > 1 || rhs->count > 1) {
            return std::nullopt;
        }
-        if (lhs->index > rhs->index || lhs->offset > rhs->offset) {
+        if (lhs->shift_left > 0 || lhs->index > rhs->index || lhs->offset > rhs->offset) {
            std::swap(lhs, rhs);
        }
        return ConstBufferAddr{
            .index = lhs->index,
            .offset = lhs->offset,
+            .shift_left = lhs->shift_left,
            .secondary_index = rhs->index,
            .secondary_offset = rhs->offset,
+            .secondary_shift_left = rhs->shift_left,
            .dynamic_offset = {},
            .count = 1,
            .has_secondary = true,
        };
    }
+    case IR::Opcode::ShiftLeftLogical32: {
+        const IR::Value shift{inst->Arg(1)};
+        if (!shift.IsImmediate()) {
+            return std::nullopt;
+        }
+        std::optional lhs{Track(inst->Arg(0), env)};
+        if (lhs) {
+            lhs->shift_left = shift.U32();
+        }
+        return lhs;
+        break;
+    }
+    case IR::Opcode::BitwiseAnd32: {
+        IR::Value op1{inst->Arg(0)};
+        IR::Value op2{inst->Arg(1)};
+        if (op1.IsImmediate()) {
+            std::swap(op1, op2);
+        }
+        if (!op2.IsImmediate() && !op1.IsImmediate()) {
+            do {
+                auto try_index = TryGetConstant(op1, env);
+                if (try_index) {
+                    op1 = op2;
+                    op2 = IR::Value{*try_index};
+                    break;
+                }
+                auto try_index_2 = TryGetConstant(op2, env);
+                if (try_index_2) {
+                    op2 = IR::Value{*try_index_2};
+                    break;
+                }
+                return std::nullopt;
+            } while (false);
+        }
+        std::optional lhs{Track(op1, env)};
+        if (lhs) {
+            lhs->shift_left = static_cast<u32>(std::countr_zero(op2.U32()));
+        }
+        return lhs;
+        break;
+    }
    case IR::Opcode::GetCbufU32x2:
    case IR::Opcode::GetCbufU32:
        break;
@@ -222,8 +289,10 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
        return ConstBufferAddr{
            .index = index.U32(),
            .offset = offset.U32(),
+            .shift_left = 0,
            .secondary_index = 0,
            .secondary_offset = 0,
+            .secondary_shift_left = 0,
            .dynamic_offset = {},
            .count = 1,
            .has_secondary = false,
@@ -247,8 +316,10 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
    return ConstBufferAddr{
        .index = index.U32(),
        .offset = base_offset,
+        .shift_left = 0,
        .secondary_index = 0,
        .secondary_offset = 0,
+        .secondary_shift_left = 0,
        .dynamic_offset = dynamic_offset,
        .count = 8,
        .has_secondary = false,
@@ -258,7 +329,7 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
 TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
    ConstBufferAddr addr;
    if (IsBindless(inst)) {
-        const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))};
+        const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0), env)};
        if (!track_addr) {
            throw NotImplementedException("Failed to track bindless texture constant buffer");
        }
@@ -267,8 +338,10 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
        addr = ConstBufferAddr{
            .index = env.TextureBoundBuffer(),
            .offset = inst.Arg(0).U32(),
+            .shift_left = 0,
            .secondary_index = 0,
            .secondary_offset = 0,
+            .secondary_shift_left = 0,
            .dynamic_offset = {},
            .count = 1,
            .has_secondary = false,
@@ -284,8 +357,9 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
 TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
    const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
    const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
-    const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)};
-    const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)};
+    const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset) << cbuf.shift_left};
+    const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)
+                      << cbuf.secondary_shift_left};
    return env.ReadTextureType(lhs_raw | rhs_raw);
 }

@@ -487,8 +561,10 @@ void TexturePass(Environment& env, IR::Program& program) {
                    .has_secondary = cbuf.has_secondary,
                    .cbuf_index = cbuf.index,
                    .cbuf_offset = cbuf.offset,
+                    .shift_left = cbuf.shift_left,
                    .secondary_cbuf_index = cbuf.secondary_index,
                    .secondary_cbuf_offset = cbuf.secondary_offset,
+                    .secondary_shift_left = cbuf.secondary_shift_left,
                    .count = cbuf.count,
                    .size_shift = DESCRIPTOR_SIZE_SHIFT,
                });
@@ -499,8 +575,10 @@ void TexturePass(Environment& env, IR::Program& program) {
                    .has_secondary = cbuf.has_secondary,
                    .cbuf_index = cbuf.index,
                    .cbuf_offset = cbuf.offset,
+                    .shift_left = cbuf.shift_left,
                    .secondary_cbuf_index = cbuf.secondary_index,
                    .secondary_cbuf_offset = cbuf.secondary_offset,
+                    .secondary_shift_left = cbuf.secondary_shift_left,
                    .count = cbuf.count,
                    .size_shift = DESCRIPTOR_SIZE_SHIFT,
                });
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -61,8 +61,10 @@ struct TextureBufferDescriptor {
    bool has_secondary;
    u32 cbuf_index;
    u32 cbuf_offset;
+    u32 shift_left;
    u32 secondary_cbuf_index;
    u32 secondary_cbuf_offset;
+    u32 secondary_shift_left;
    u32 count;
    u32 size_shift;
 };
@@ -85,8 +87,10 @@ struct TextureDescriptor {
    bool has_secondary;
    u32 cbuf_index;
    u32 cbuf_offset;
+    u32 shift_left;
    u32 secondary_cbuf_index;
    u32 secondary_cbuf_offset;
+    u32 secondary_shift_left;
    u32 count;
    u32 size_shift;
 };
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -4,7 +4,7 @@
 add_subdirectory(host_shaders)

 if(LIBVA_FOUND)
-    set_source_files_properties(command_classes/codecs/codec.cpp
+    set_source_files_properties(host1x/codecs/codec.cpp
        PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
    list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
 endif()
@@ -15,26 +15,14 @@ add_library(video_core STATIC
    buffer_cache/buffer_cache.h
    cdma_pusher.cpp
    cdma_pusher.h
-    command_classes/codecs/codec.cpp
-    command_classes/codecs/codec.h
-    command_classes/codecs/h264.cpp
-    command_classes/codecs/h264.h
-    command_classes/codecs/vp8.cpp
-    command_classes/codecs/vp8.h
-    command_classes/codecs/vp9.cpp
-    command_classes/codecs/vp9.h
-    command_classes/codecs/vp9_types.h
-    command_classes/host1x.cpp
-    command_classes/host1x.h
-    command_classes/nvdec.cpp
-    command_classes/nvdec.h
-    command_classes/nvdec_common.h
-    command_classes/sync_manager.cpp
-    command_classes/sync_manager.h
-    command_classes/vic.cpp
-    command_classes/vic.h
    compatible_formats.cpp
    compatible_formats.h
+    control/channel_state.cpp
+    control/channel_state.h
+    control/channel_state_cache.cpp
+    control/channel_state_cache.h
+    control/scheduler.cpp
+    control/scheduler.h
    delayed_destruction_ring.h
    dirty_flags.cpp
    dirty_flags.h
@@ -54,7 +42,31 @@ add_library(video_core STATIC
    engines/maxwell_3d.h
    engines/maxwell_dma.cpp
    engines/maxwell_dma.h
+    engines/puller.cpp
+    engines/puller.h
    framebuffer_config.h
+    host1x/codecs/codec.cpp
+    host1x/codecs/codec.h
+    host1x/codecs/h264.cpp
+    host1x/codecs/h264.h
+    host1x/codecs/vp8.cpp
+    host1x/codecs/vp8.h
+    host1x/codecs/vp9.cpp
+    host1x/codecs/vp9.h
+    host1x/codecs/vp9_types.h
+    host1x/control.cpp
+    host1x/control.h
+    host1x/host1x.cpp
+    host1x/host1x.h
+    host1x/nvdec.cpp
+    host1x/nvdec.h
+    host1x/nvdec_common.h
+    host1x/sync_manager.cpp
+    host1x/sync_manager.h
+    host1x/syncpoint_manager.cpp
+    host1x/syncpoint_manager.h
+    host1x/vic.cpp
+    host1x/vic.h
    macro/macro.cpp
    macro/macro.h
    macro/macro_hle.cpp
@@ -195,6 +207,7 @@ add_library(video_core STATIC
    texture_cache/render_targets.h
    texture_cache/samples_helper.h
    texture_cache/slot_vector.h
+    texture_cache/texture_cache.cpp
    texture_cache/texture_cache.h
    texture_cache/texture_cache_base.h
    texture_cache/types.h
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -5,7 +5,6 @@

 #include <algorithm>
 #include <array>
-#include <deque>
 #include <memory>
 #include <mutex>
 #include <numeric>
@@ -23,6 +22,7 @@
 #include "common/settings.h"
 #include "core/memory.h"
 #include "video_core/buffer_cache/buffer_base.h"
+#include "video_core/control/channel_state_cache.h"
 #include "video_core/delayed_destruction_ring.h"
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/kepler_compute.h"
@@ -56,7 +56,7 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE
 using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;

 template <typename P>
-class BufferCache {
+class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {

    // Page size for caching purposes.
    // This is unrelated to the CPU page size and it can be changed as it seems optimal.
@@ -116,10 +116,7 @@ public:
    static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);

    explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
-                         Tegra::Engines::Maxwell3D& maxwell3d_,
-                         Tegra::Engines::KeplerCompute& kepler_compute_,
-                         Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                         Runtime& runtime_);
+                         Core::Memory::Memory& cpu_memory_, Runtime& runtime_);

    void TickFrame();

@@ -129,7 +126,7 @@ public:

    void DownloadMemory(VAddr cpu_addr, u64 size);

-    bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer);
+    bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);

    void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);

@@ -353,7 +350,7 @@ private:

    void NotifyBufferDeletion();

-    [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
+    [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, bool is_written = false) const;

    [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
                                                               PixelFormat format);
@@ -367,9 +364,6 @@ private:
    void ClearDownload(IntervalType subtract_interval);

    VideoCore::RasterizerInterface& rasterizer;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;
-    Tegra::MemoryManager& gpu_memory;
    Core::Memory::Memory& cpu_memory;

    SlotVector<Buffer> slot_buffers;
@@ -444,12 +438,8 @@ private:

 template <class P>
 BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
-                            Tegra::Engines::Maxwell3D& maxwell3d_,
-                            Tegra::Engines::KeplerCompute& kepler_compute_,
-                            Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                            Runtime& runtime_)
-    : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
-      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
+                            Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
+    : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} {
    // Ensure the first slot is used for the null buffer
    void(slot_buffers.insert(runtime, NullBufferParams{}));
    common_ranges.clear();
@@ -552,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {

 template <class P>
 bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
-    const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
-    const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
+    const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
+    const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
    if (!cpu_src_address || !cpu_dest_address) {
        return false;
    }
@@ -611,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am

 template <class P>
 bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
-    const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address);
+    const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
    if (!cpu_dst_address) {
        return false;
    }
@@ -635,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
 template <class P>
 void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
                                               u32 size) {
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
    const Binding binding{
        .cpu_addr = *cpu_addr,
        .size = size,
@@ -673,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
    if (is_indexed) {
        BindHostIndexBuffer();
    } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
-        const auto& regs = maxwell3d.regs;
+        const auto& regs = maxwell3d->regs;
        if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
            runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
        }
@@ -733,9 +723,9 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
    enabled_storage_buffers[stage] |= 1U << ssbo_index;
    written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;

-    const auto& cbufs = maxwell3d.state.shader_stages[stage];
+    const auto& cbufs = maxwell3d->state.shader_stages[stage];
    const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
-    storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
+    storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
 }

 template <class P>
@@ -770,12 +760,12 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
    enabled_compute_storage_buffers |= 1U << ssbo_index;
    written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;

-    const auto& launch_desc = kepler_compute.launch_description;
+    const auto& launch_desc = kepler_compute->launch_description;
    ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);

    const auto& cbufs = launch_desc.const_buffer_config;
    const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
-    compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr);
+    compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
 }

 template <class P>
@@ -836,6 +826,19 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
    const bool is_accuracy_normal =
        Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;

+    auto it = committed_ranges.begin();
+    while (it != committed_ranges.end()) {
+        auto& current_intervals = *it;
+        auto next_it = std::next(it);
+        while (next_it != committed_ranges.end()) {
+            for (auto& interval : *next_it) {
+                current_intervals.subtract(interval);
+            }
+            next_it++;
+        }
+        it++;
+    }
+
    boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
    u64 total_size_bytes = 0;
    u64 largest_copy = 0;
@@ -991,19 +994,19 @@ void BufferCache<P>::BindHostIndexBuffer() {
    const u32 size = index_buffer.size;
    SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
    if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
-        const u32 new_offset = offset + maxwell3d.regs.index_array.first *
-                                            maxwell3d.regs.index_array.FormatSizeInBytes();
+        const u32 new_offset = offset + maxwell3d->regs.index_array.first *
+                                            maxwell3d->regs.index_array.FormatSizeInBytes();
        runtime.BindIndexBuffer(buffer, new_offset, size);
    } else {
-        runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
-                                maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
-                                buffer, offset, size);
+        runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format,
+                                maxwell3d->regs.index_array.first,
+                                maxwell3d->regs.index_array.count, buffer, offset, size);
    }
 }

 template <class P>
 void BufferCache<P>::BindHostVertexBuffers() {
-    auto& flags = maxwell3d.dirty.flags;
+    auto& flags = maxwell3d->dirty.flags;
    for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
        const Binding& binding = vertex_buffers[index];
        Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -1014,7 +1017,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
        }
        flags[Dirty::VertexBuffer0 + index] = false;

-        const u32 stride = maxwell3d.regs.vertex_array[index].stride;
+        const u32 stride = maxwell3d->regs.vertex_array[index].stride;
        const u32 offset = buffer.Offset(binding.cpu_addr);
        runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
    }
@@ -1154,7 +1157,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {

 template <class P>
 void BufferCache<P>::BindHostTransformFeedbackBuffers() {
-    if (maxwell3d.regs.tfb_enabled == 0) {
+    if (maxwell3d->regs.tfb_enabled == 0) {
        return;
    }
    for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1239,16 +1242,19 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {

 template <class P>
 void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
-    if (is_indexed) {
-        UpdateIndexBuffer();
-    }
-    UpdateVertexBuffers();
-    UpdateTransformFeedbackBuffers();
-    for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
-        UpdateUniformBuffers(stage);
-        UpdateStorageBuffers(stage);
-        UpdateTextureBuffers(stage);
-    }
+    do {
+        has_deleted_buffers = false;
+        if (is_indexed) {
+            UpdateIndexBuffer();
+        }
+        UpdateVertexBuffers();
+        UpdateTransformFeedbackBuffers();
+        for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+            UpdateUniformBuffers(stage);
+            UpdateStorageBuffers(stage);
+            UpdateTextureBuffers(stage);
+        }
+    } while (has_deleted_buffers);
 }

 template <class P>
@@ -1262,8 +1268,8 @@ template <class P>
 void BufferCache<P>::UpdateIndexBuffer() {
    // We have to check for the dirty flags and index count
    // The index count is currently changed without updating the dirty flags
-    const auto& index_array = maxwell3d.regs.index_array;
-    auto& flags = maxwell3d.dirty.flags;
+    const auto& index_array = maxwell3d->regs.index_array;
+    auto& flags = maxwell3d->dirty.flags;
    if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
        return;
    }
@@ -1272,7 +1278,7 @@ void BufferCache<P>::UpdateIndexBuffer() {

    const GPUVAddr gpu_addr_begin = index_array.StartAddress();
    const GPUVAddr gpu_addr_end = index_array.EndAddress();
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
    const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
    const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
    const u32 size = std::min(address_size, draw_size);
@@ -1289,8 +1295,8 @@ void BufferCache<P>::UpdateIndexBuffer() {

 template <class P>
 void BufferCache<P>::UpdateVertexBuffers() {
-    auto& flags = maxwell3d.dirty.flags;
-    if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
+    auto& flags = maxwell3d->dirty.flags;
+    if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) {
        return;
    }
    flags[Dirty::VertexBuffers] = false;
@@ -1302,33 +1308,25 @@ void BufferCache<P>::UpdateVertexBuffers() {

 template <class P>
 void BufferCache<P>::UpdateVertexBuffer(u32 index) {
-    if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
+    if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) {
        return;
    }
-    const auto& array = maxwell3d.regs.vertex_array[index];
-    const auto& limit = maxwell3d.regs.vertex_array_limit[index];
+    const auto& array = maxwell3d->regs.vertex_array[index];
+    const auto& limit = maxwell3d->regs.vertex_array_limit[index];
    const GPUVAddr gpu_addr_begin = array.StartAddress();
    const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
-    u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
-    if (address_size >= 64_MiB) {
-        // Reported vertex buffer size is very large, cap to mapped buffer size
-        GPUVAddr submapped_addr_end = gpu_addr_begin;
-
-        const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
-        if (ranges.size() > 0) {
-            const auto& [addr, size] = *ranges.begin();
-            submapped_addr_end = addr + size;
-        }
-
-        address_size =
-            std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
-    }
-    const u32 size = address_size; // TODO: Analyze stride and number of vertices
-    if (array.enable == 0 || size == 0 || !cpu_addr) {
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
+    u32 address_size = static_cast<u32>(
+        std::min(gpu_addr_end - gpu_addr_begin, static_cast<u64>(std::numeric_limits<u32>::max())));
+    if (array.enable == 0 || address_size == 0 || !cpu_addr) {
        vertex_buffers[index] = NULL_BINDING;
        return;
    }
+    if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
+        address_size =
+            static_cast<u32>(gpu_memory->MaxContinousRange(gpu_addr_begin, address_size));
+    }
+    const u32 size = address_size; // TODO: Analyze stride and number of vertices
    vertex_buffers[index] = Binding{
        .cpu_addr = *cpu_addr,
        .size = size,
@@ -1382,7 +1380,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {

 template <class P>
 void BufferCache<P>::UpdateTransformFeedbackBuffers() {
-    if (maxwell3d.regs.tfb_enabled == 0) {
+    if (maxwell3d->regs.tfb_enabled == 0) {
        return;
    }
    for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1392,10 +1390,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() {

 template <class P>
 void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
-    const auto& binding = maxwell3d.regs.tfb_bindings[index];
+    const auto& binding = maxwell3d->regs.tfb_bindings[index];
    const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
    const u32 size = binding.buffer_size;
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
    if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
        transform_feedback_buffers[index] = NULL_BINDING;
        return;
@@ -1414,10 +1412,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
    ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
        Binding& binding = compute_uniform_buffers[index];
        binding = NULL_BINDING;
-        const auto& launch_desc = kepler_compute.launch_description;
+        const auto& launch_desc = kepler_compute->launch_description;
        if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
            const auto& cbuf = launch_desc.const_buffer_config[index];
-            const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
+            const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
            if (cpu_addr) {
                binding.cpu_addr = *cpu_addr;
                binding.size = cbuf.size;
@@ -1567,6 +1565,8 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
    const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
    const u32 size = static_cast<u32>(overlap.end - overlap.begin);
    const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
+    auto& new_buffer = slot_buffers[new_buffer_id];
+    runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0);
    for (const BufferId overlap_id : overlap.ids) {
        JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
    }
@@ -1695,7 +1695,7 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,

 template <class P>
 bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
-                                  std::span<u8> inlined_buffer) {
+                                  std::span<const u8> inlined_buffer) {
    const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
    if (!is_dirty) {
        return false;
@@ -1831,7 +1831,7 @@ void BufferCache<P>::NotifyBufferDeletion() {
        dirty_uniform_buffers.fill(~u32{0});
        uniform_buffer_binding_sizes.fill({});
    }
-    auto& flags = maxwell3d.dirty.flags;
+    auto& flags = maxwell3d->dirty.flags;
    flags[Dirty::IndexBuffer] = true;
    flags[Dirty::VertexBuffers] = true;
    for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
@@ -1841,16 +1841,18 @@ void BufferCache<P>::NotifyBufferDeletion() {
 }

 template <class P>
-typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
-    const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
-    const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr,
+                                                                      bool is_written) const {
+    const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
+    const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
    if (!cpu_addr || size == 0) {
        return NULL_BINDING;
    }
+    const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
    const Binding binding{
        .cpu_addr = *cpu_addr,
-        .size = size,
+        .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
        .buffer_id = BufferId{},
    };
    return binding;
@@ -1859,7 +1861,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
 template <class P>
 typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
    GPUVAddr gpu_addr, u32 size, PixelFormat format) {
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
    TextureBufferBinding binding;
    if (!cpu_addr || size == 0) {
        binding.cpu_addr = 0;
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -2,20 +2,22 @@
 // SPDX-License-Identifier: MIT

 #include <bit>
-#include "command_classes/host1x.h"
-#include "command_classes/nvdec.h"
-#include "command_classes/vic.h"
 #include "video_core/cdma_pusher.h"
-#include "video_core/command_classes/sync_manager.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/gpu.h"
+#include "video_core/host1x/control.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/nvdec.h"
+#include "video_core/host1x/nvdec_common.h"
+#include "video_core/host1x/sync_manager.h"
+#include "video_core/host1x/vic.h"
+#include "video_core/memory_manager.h"

 namespace Tegra {
-CDmaPusher::CDmaPusher(GPU& gpu_)
-    : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
-      vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
-      host1x_processor(std::make_unique<Host1x>(gpu)),
-      sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
+CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_)
+    : host1x{host1x_}, nvdec_processor(std::make_shared<Host1x::Nvdec>(host1x)),
+      vic_processor(std::make_unique<Host1x::Vic>(host1x, nvdec_processor)),
+      host1x_processor(std::make_unique<Host1x::Control>(host1x)),
+      sync_manager(std::make_unique<Host1x::SyncptIncrManager>(host1x)) {}

 CDmaPusher::~CDmaPusher() = default;

@@ -109,16 +111,17 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
        case ThiMethod::SetMethod1:
            LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
                      static_cast<u32>(vic_thi_state.method_0), data);
-            vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), data);
+            vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0),
+                                         data);
            break;
        default:
            break;
        }
        break;
-    case ChClassId::Host1x:
+    case ChClassId::Control:
        // This device is mainly for syncpoint synchronization
        LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
-        host1x_processor->ProcessMethod(static_cast<Host1x::Method>(offset), data);
+        host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data);
        break;
    default:
        UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -12,11 +12,13 @@

 namespace Tegra {

-class GPU;
+namespace Host1x {
+class Control;
 class Host1x;
 class Nvdec;
 class SyncptIncrManager;
 class Vic;
+} // namespace Host1x

 enum class ChSubmissionMode : u32 {
    SetClass = 0,
@@ -30,7 +32,7 @@ enum class ChSubmissionMode : u32 {

 enum class ChClassId : u32 {
    NoClass = 0x0,
-    Host1x = 0x1,
+    Control = 0x1,
    VideoEncodeMpeg = 0x20,
    VideoEncodeNvEnc = 0x21,
    VideoStreamingVi = 0x30,
@@ -88,7 +90,7 @@ enum class ThiMethod : u32 {

 class CDmaPusher {
 public:
-    explicit CDmaPusher(GPU& gpu_);
+    explicit CDmaPusher(Host1x::Host1x& host1x);
    ~CDmaPusher();

    /// Process the command entry
@@ -101,11 +103,11 @@ private:
    /// Write arguments value to the ThiRegisters member at the specified offset
    void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument);

-    GPU& gpu;
-    std::shared_ptr<Tegra::Nvdec> nvdec_processor;
-    std::unique_ptr<Tegra::Vic> vic_processor;
-    std::unique_ptr<Tegra::Host1x> host1x_processor;
-    std::unique_ptr<SyncptIncrManager> sync_manager;
+    Host1x::Host1x& host1x;
+    std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
+    std::unique_ptr<Tegra::Host1x::Vic> vic_processor;
+    std::unique_ptr<Tegra::Host1x::Control> host1x_processor;
+    std::unique_ptr<Host1x::SyncptIncrManager> sync_manager;
    ChClassId current_class{};
    ThiRegisters vic_thi_state{};
    ThiRegisters nvdec_thi_state{};
--- a/src/video_core/command_classes/host1x.cpp
+++ b/src/video_core/command_classes/host1x.cpp
@@ -1,29 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include "common/assert.h"
-#include "video_core/command_classes/host1x.h"
-#include "video_core/gpu.h"
-
-Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
-
-Tegra::Host1x::~Host1x() = default;
-
-void Tegra::Host1x::ProcessMethod(Method method, u32 argument) {
-    switch (method) {
-    case Method::LoadSyncptPayload32:
-        syncpoint_value = argument;
-        break;
-    case Method::WaitSyncpt:
-    case Method::WaitSyncpt32:
-        Execute(argument);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
-        break;
-    }
-}
-
-void Tegra::Host1x::Execute(u32 data) {
-    gpu.WaitFence(data, syncpoint_value);
-}
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -0,0 +1,40 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "common/assert.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/engines/puller.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Control {
+
+ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
+
+void ChannelState::Init(Core::System& system, GPU& gpu) {
+    ASSERT(memory_manager);
+    dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
+    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
+    fermi_2d = std::make_unique<Engines::Fermi2D>();
+    kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
+    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
+    initialized = true;
+}
+
+void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
+    dma_pusher->BindRasterizer(rasterizer);
+    memory_manager->BindRasterizer(rasterizer);
+    maxwell_3d->BindRasterizer(rasterizer);
+    fermi_2d->BindRasterizer(rasterizer);
+    kepler_memory->BindRasterizer(rasterizer);
+    kepler_compute->BindRasterizer(rasterizer);
+    maxwell_dma->BindRasterizer(rasterizer);
+}
+
+} // namespace Tegra::Control
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@@ -0,0 +1,68 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+
+namespace Core {
+class System;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra {
+
+class GPU;
+
+namespace Engines {
+class Puller;
+class Fermi2D;
+class Maxwell3D;
+class MaxwellDMA;
+class KeplerCompute;
+class KeplerMemory;
+} // namespace Engines
+
+class MemoryManager;
+class DmaPusher;
+
+namespace Control {
+
+struct ChannelState {
+    explicit ChannelState(s32 bind_id);
+    ChannelState(const ChannelState& state) = delete;
+    ChannelState& operator=(const ChannelState&) = delete;
+    ChannelState(ChannelState&& other) noexcept = default;
+    ChannelState& operator=(ChannelState&& other) noexcept = default;
+
+    void Init(Core::System& system, GPU& gpu);
+
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+    s32 bind_id = -1;
+    /// 3D engine
+    std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
+    /// 2D engine
+    std::unique_ptr<Engines::Fermi2D> fermi_2d;
+    /// Compute engine
+    std::unique_ptr<Engines::KeplerCompute> kepler_compute;
+    /// DMA engine
+    std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
+    /// Inline memory engine
+    std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+
+    std::shared_ptr<MemoryManager> memory_manager;
+
+    std::unique_ptr<DmaPusher> dma_pusher;
+
+    bool initialized{};
+};
+
+} // namespace Control
+
+} // namespace Tegra
--- a/src/video_core/control/channel_state_cache.cpp
+++ b/src/video_core/control/channel_state_cache.cpp
@@ -0,0 +1,14 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "video_core/control/channel_state_cache.inc"
+
+namespace VideoCommon {
+
+ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
+    : maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
+      gpu_memory{*channel_state.memory_manager} {}
+
+template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
+
+} // namespace VideoCommon
--- a/src/video_core/control/channel_state_cache.h
+++ b/src/video_core/control/channel_state_cache.h
@@ -0,0 +1,101 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <deque>
+#include <limits>
+#include <mutex>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Tegra {
+
+namespace Engines {
+class Maxwell3D;
+class KeplerCompute;
+} // namespace Engines
+
+class MemoryManager;
+
+namespace Control {
+struct ChannelState;
+}
+
+} // namespace Tegra
+
+namespace VideoCommon {
+
+class ChannelInfo {
+public:
+    ChannelInfo() = delete;
+    explicit ChannelInfo(Tegra::Control::ChannelState& state);
+    ChannelInfo(const ChannelInfo& state) = delete;
+    ChannelInfo& operator=(const ChannelInfo&) = delete;
+    ChannelInfo(ChannelInfo&& other) = default;
+    ChannelInfo& operator=(ChannelInfo&& other) = default;
+
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::Engines::KeplerCompute& kepler_compute;
+    Tegra::MemoryManager& gpu_memory;
+};
+
+template <class P>
+class ChannelSetupCaches {
+public:
+    /// Operations for seting the channel of execution.
+    virtual ~ChannelSetupCaches();
+
+    /// Create channel state.
+    virtual void CreateChannel(Tegra::Control::ChannelState& channel);
+
+    /// Bind a channel for execution.
+    void BindToChannel(s32 id);
+
+    /// Erase channel's state.
+    void EraseChannel(s32 id);
+
+    Tegra::MemoryManager* GetFromID(size_t id) const {
+        std::unique_lock<std::mutex> lk(config_mutex);
+        const auto ref = address_spaces.find(id);
+        return ref->second.gpu_memory;
+    }
+
+    std::optional<size_t> getStorageID(size_t id) const {
+        std::unique_lock<std::mutex> lk(config_mutex);
+        const auto ref = address_spaces.find(id);
+        if (ref == address_spaces.end()) {
+            return std::nullopt;
+        }
+        return ref->second.storage_id;
+    }
+
+protected:
+    static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
+
+    P* channel_state;
+    size_t current_channel_id{UNSET_CHANNEL};
+    size_t current_address_space{};
+    Tegra::Engines::Maxwell3D* maxwell3d;
+    Tegra::Engines::KeplerCompute* kepler_compute;
+    Tegra::MemoryManager* gpu_memory;
+
+    std::deque<P> channel_storage;
+    std::deque<size_t> free_channel_ids;
+    std::unordered_map<s32, size_t> channel_map;
+    std::vector<size_t> active_channel_ids;
+    struct AddresSpaceRef {
+        size_t ref_count;
+        size_t storage_id;
+        Tegra::MemoryManager* gpu_memory;
+    };
+    std::unordered_map<size_t, AddresSpaceRef> address_spaces;
+    mutable std::mutex config_mutex;
+
+    virtual void OnGPUASRegister([[maybe_unused]] size_t map_id) {}
+};
+
+} // namespace VideoCommon
--- a/src/video_core/control/channel_state_cache.inc
+++ b/src/video_core/control/channel_state_cache.inc
@@ -0,0 +1,86 @@
+// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <algorithm>
+
+#include "video_core/control/channel_state.h"
+#include "video_core/control/channel_state_cache.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+
+namespace VideoCommon {
+
+template <class P>
+ChannelSetupCaches<P>::~ChannelSetupCaches() = default;
+
+template <class P>
+void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
+    std::unique_lock<std::mutex> lk(config_mutex);
+    ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
+    auto new_id = [this, &channel]() {
+        if (!free_channel_ids.empty()) {
+            auto id = free_channel_ids.front();
+            free_channel_ids.pop_front();
+            new (&channel_storage[id]) P(channel);
+            return id;
+        }
+        channel_storage.emplace_back(channel);
+        return channel_storage.size() - 1;
+    }();
+    channel_map.emplace(channel.bind_id, new_id);
+    if (current_channel_id != UNSET_CHANNEL) {
+        channel_state = &channel_storage[current_channel_id];
+    }
+    active_channel_ids.push_back(new_id);
+    auto as_it = address_spaces.find(channel.memory_manager->GetID());
+    if (as_it != address_spaces.end()) {
+        as_it->second.ref_count++;
+        return;
+    }
+    AddresSpaceRef new_gpu_mem_ref{
+        .ref_count = 1,
+        .storage_id = address_spaces.size(),
+        .gpu_memory = channel.memory_manager.get(),
+    };
+    address_spaces.emplace(channel.memory_manager->GetID(), new_gpu_mem_ref);
+    OnGPUASRegister(channel.memory_manager->GetID());
+}
+
+/// Bind a channel for execution.
+template <class P>
+void ChannelSetupCaches<P>::BindToChannel(s32 id) {
+    std::unique_lock<std::mutex> lk(config_mutex);
+    auto it = channel_map.find(id);
+    ASSERT(it != channel_map.end() && id >= 0);
+    current_channel_id = it->second;
+    channel_state = &channel_storage[current_channel_id];
+    maxwell3d = &channel_state->maxwell3d;
+    kepler_compute = &channel_state->kepler_compute;
+    gpu_memory = &channel_state->gpu_memory;
+    current_address_space = gpu_memory->GetID();
+}
+
+/// Erase channel's channel_state.
+template <class P>
+void ChannelSetupCaches<P>::EraseChannel(s32 id) {
+    std::unique_lock<std::mutex> lk(config_mutex);
+    const auto it = channel_map.find(id);
+    ASSERT(it != channel_map.end() && id >= 0);
+    const auto this_id = it->second;
+    free_channel_ids.push_back(this_id);
+    channel_map.erase(it);
+    if (this_id == current_channel_id) {
+        current_channel_id = UNSET_CHANNEL;
+        channel_state = nullptr;
+        maxwell3d = nullptr;
+        kepler_compute = nullptr;
+        gpu_memory = nullptr;
+    } else if (current_channel_id != UNSET_CHANNEL) {
+        channel_state = &channel_storage[current_channel_id];
+    }
+    active_channel_ids.erase(
+        std::find(active_channel_ids.begin(), active_channel_ids.end(), this_id));
+}
+
+} // namespace VideoCommon
--- a/src/video_core/control/scheduler.cpp
+++ b/src/video_core/control/scheduler.cpp
@@ -0,0 +1,32 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <memory>
+
+#include "common/assert.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/control/scheduler.h"
+#include "video_core/gpu.h"
+
+namespace Tegra::Control {
+Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
+
+Scheduler::~Scheduler() = default;
+
+void Scheduler::Push(s32 channel, CommandList&& entries) {
+    std::unique_lock lk(scheduling_guard);
+    auto it = channels.find(channel);
+    ASSERT(it != channels.end());
+    auto channel_state = it->second;
+    gpu.BindChannel(channel_state->bind_id);
+    channel_state->dma_pusher->Push(std::move(entries));
+    channel_state->dma_pusher->DispatchCalls();
+}
+
+void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
+    s32 channel = new_channel->bind_id;
+    std::unique_lock lk(scheduling_guard);
+    channels.emplace(channel, new_channel);
+}
+
+} // namespace Tegra::Control
--- a/src/video_core/control/scheduler.h
+++ b/src/video_core/control/scheduler.h
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "video_core/dma_pusher.h"
+
+namespace Tegra {
+
+class GPU;
+
+namespace Control {
+
+struct ChannelState;
+
+class Scheduler {
+public:
+    explicit Scheduler(GPU& gpu_);
+    ~Scheduler();
+
+    void Push(s32 channel, CommandList&& entries);
+
+    void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
+
+private:
+    std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
+    std::mutex scheduling_guard;
+    GPU& gpu;
+};
+
+} // namespace Control
+
+} // namespace Tegra
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Liam	5b7c0f13d3	fsp_srv: stub GetCacheStorageSize	2022-10-08 12:24:00 -04:00
Mai	155213484b	Merge pull request #9016 from liamwhite/drunken-schedule vk_scheduler: wait for command processing to complete	2022-10-07 20:27:16 -04:00
Mai	b7ad83383f	Merge pull request #8932 from abouvier/cmake-pkgconfig cmake: Fix FindPkgConfig	2022-10-07 20:25:51 -04:00
Mai	6f101e0f02	Merge pull request #9030 from Morph1984/api-disable configure_graphics: Fix graphics API selection when a game is running	2022-10-07 20:25:23 -04:00
liamwhite	972b93bf00	Merge pull request #8807 from Docteh/default_fonts Qt: work around Qt5's font choice for Chinese (in Windows)	2022-10-07 17:39:39 -04:00
Morph	1e35ade1ec	configure_graphics: Fix graphics API selection when a game is running The graphics API setting should not be changed when a game is running.	2022-10-07 15:11:26 -04:00
Narr the Reg	b8777b6653	Merge pull request #9028 from liamwhite/wtype-limits nfp_types: silence -Wtype-limits	2022-10-07 09:03:35 -05:00
Liam	9574429c5f	nfp_types: silence -Wtype-limits	2022-10-07 06:52:28 -04:00
bunnei	61883d8820	Merge pull request #6142 from lat9nq/prog_meta_ref_bind_address program_metadata: Avoid reference binding to misaligned address	2022-10-06 20:42:15 -07:00
bunnei	bb86fc573f	Merge pull request #8944 from Tachi107/patch-2 build(room): simplify yuzu-room installation	2022-10-06 16:59:04 -07:00
Fernando S	1effa578f1	Merge pull request #8467 from FernandoS27/yfc-rel-1 Project yuzu Fried Chicken (Y.F.C.) Part 1	2022-10-06 21:29:53 +02:00
Byte	df6dffa30b	vulkan_blitter: Fix pool allocation double free.	2022-10-06 21:00:54 +02:00
Liam	aedd739631	maxwell_dma: remove warnings from implemented functionality	2022-10-06 21:00:54 +02:00
Fernando Sahmkow	ca3db0d7c9	General: address feedback	2022-10-06 21:00:54 +02:00
Liam	0d99b7962d	state_tracker: workaround channel setup for homebrew	2022-10-06 21:00:54 +02:00
Liam	c80ed6d81f	general: rework usages of UNREACHABLE macro	2022-10-06 21:00:54 +02:00
Morph	903705043d	nvdisp: End system frame after requesting to swap buffers Fixes frametime reporting	2022-10-06 21:00:54 +02:00
Morph	11e1cbbdbd	address_space: Rename va_start to virt_start Avoids conflicting with the va_start macro	2022-10-06 21:00:54 +02:00
Morph	fa342cae22	address_space: Address feedback	2022-10-06 21:00:54 +02:00
Morph	fedd983f96	general: Format licenses as per SPDX guidelines	2022-10-06 21:00:54 +02:00
Fernando Sahmkow	d97d409647	NvHostChannels: improve hack for supporting multiple channels.	2022-10-06 21:00:54 +02:00
Fernando Sahmkow	c2b7de66b3	Address Feedback from bylaws.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	8a372035db	Nvflinger: correct duplication.	2022-10-06 21:00:53 +02:00
VonChenPlus	9982cff98b	Core: Fix get nvmap object random crash	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	fe24c65153	General: Fix clang format.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	1a9b71b1c6	Common: Fix variable shadowing.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	cdce7f781b	Vulkan Swapchain: Overall improvements.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	8d774e7415	NvDec: Fix regressions.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	ada09778d9	Vulkan Texture Cache: Limit render area to the max width/height of the targets.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	8fd1d769fe	ImageBase: Basic fixes.	2022-10-06 21:00:53 +02:00
Liam White	afab6c143c	General: Fix compilation for GCC	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	fd7afda1e8	VideoCore: Implement formats needed for N64 emulation.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	770e19f51a	Buffer Cache: Deduce vertex array limit from memory layout when limit is the highest possible.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	8bb604b3be	VideoCore: Add option to dump the macros.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	a9ca39f859	NVDRV: Further improvements.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	b59ca4df0c	Buffer Cache: Basic fixes.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	98317f2b77	Decoders: Improve overall speed.	2022-10-06 21:00:53 +02:00
bunnei	f5fd6b5c86	DMA & InlineToMemory Engines Rework.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	b2099fbdcc	Maxwell3D: Add small_index_2	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	7cfa28a666	Memory Manager: ensure safety of GPU to CPU address.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	5a568b1655	MemoryManager: Fix errors popping out.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	3d02143476	Shader Decompiler: implement better tracking for Vulkan samplers.	2022-10-06 21:00:53 +02:00
Fernando Sahmkow	ba34cf0a69	Shader Decompiler: Check for shift when deriving composite samplers.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	a283eda320	Shader Decompiler: Fix dangerous behavior of invalid iterator insertion.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	359f22b808	MemoryManager: Finish up the initial implementation.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	5caa150e9a	OpenGL: Fix TickWork	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	bc8b3d225e	VideoCore: Refactor fencing system.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	4d60410dd9	MemoryManager: initial multi paging system implementation.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	98b5e236d4	Vulkan: Fix Scissor on Clears	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	920429fde7	NVDRV: Further refactors and eliminate old code.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	2931101e6f	NVDRV: Refactor Host1x	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	668e80a9f4	VideoCore: Refactor syncing.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	e44ac8b821	Texture Cache: Fix GC and GPU Modified on Joins.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	f350c3d74e	Texture cache: Fix the remaining issues with memory mnagement and unmapping.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	9cf4c8831d	Texture cache: Fix dangling references on multichannel.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	e462191482	Refactor VideoCore to use AS sepparate from Channel.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	bb74973bba	General: Rebase fixes.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	6fc4012396	VideoCore: Extra Fixes.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	feb49c822d	NVDRV: Remake ASGPU	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	c6ea0c650e	NVDRV: Update copyright notices.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	0f4ae3cc52	MemoryManager: Temporary Fix for NVDEC.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	835b950f7e	NvHostCtrl: Fix merge of nvflinger.	2022-10-06 21:00:52 +02:00
Fernando Sahmkow	cbaf3fb433	VideoCore: Update MemoryManager	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	b617874724	Common: implement MultiLevelPageTable.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	3f8e7a5585	VideoCore: Fix channels with disk pipeline/shader cache.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	d7990c159e	OpenGl: Implement Channels.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	2c62563ab5	NVHOST_CTRl: Implement missing method and fix some stuffs.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	139ea93512	VideoCore: implement channels on gpu caches.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	c77b8df12e	NVASGPU: Fix Remap.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	ad038609c8	NVDRV: Fix clearing when destroying.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	68d9504a04	NVMAP: Fix the Free return parameters.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	af35dbcf63	NVDRV: Fix Open/Close and make sure each device is correctly created.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	de0e8eff42	NVDRV: Implement new NvMap	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	3cbe352c18	NVDRV: Refactor and add new NvMap.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	a21b8824fb	NVDRV: Cleanup.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	d30b885d71	NVDRV: Implement QueryEvent.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	39a5ce4e69	NvHost: Remake Ctrl Implementation.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	ac104a24d1	NvHost: Try a different approach to blocking.	2022-10-06 21:00:51 +02:00
Fernando Sahmkow	7b7f6f1cb7	NvHost: Fix some regressions and correct signaling on timeout.	2022-10-06 21:00:51 +02:00
Fernando S	31d4bc6953	Merge pull request #9025 from FernandoS27/slava-ukrayini Texture Cache: Add ASTC 10x5 Format.	2022-10-06 17:10:28 +02:00
Fernando Sahmkow	1a49991676	Texture Cache: Add ASTC 10x5 Format.	2022-10-06 16:45:40 +02:00
bunnei	d55096ce85	Merge pull request #9013 from liamwhite/spinning-a-yarn common: remove "yuzu:" prefix from thread names	2022-10-05 18:53:42 -07:00
bunnei	1689530f52	Merge pull request #9015 from german77/amiibo-rewrite service: nfp: Fix errors to pass unit testing	2022-10-05 14:13:57 -07:00
Kyle Kienapfel	3b5a937125	Show error from cpp-httplib when we don't have a response to read (report errors while connecting to API) (#8999 ) Co-authored-by: Kyle Kienapfel <Docteh@users.noreply.github.com>	2022-10-05 21:39:54 +02:00
Fernando S	71fe9fd0f2	Merge pull request #8987 from liamwhite/another-name-for-reinforcement-steel vulkan: automatically use larger staging buffer sizes when possible	2022-10-05 08:54:22 +02:00
Fernando S	4774e32593	Merge pull request #9011 from liamwhite/frog-emoji-moment shader_recompiler: add extended LDC to GLASM backend	2022-10-05 08:53:26 +02:00
bunnei	fc0ace6048	Merge pull request #9005 from liamwhite/micro-fit macro_jit_x64: cancel exit for taken branch	2022-10-04 20:08:02 -07:00
bunnei	92c0ad23eb	Merge pull request #9010 from liamwhite/buttwise macro_jit_x64: fix miscompilation of bit extraction operations	2022-10-04 15:52:39 -07:00
Narr the Reg	e85c19adcb	service: nfp: Fix errors to pass unit testing	2022-10-03 18:06:55 -05:00
Liam	35d3e7db2a	common: remove "yuzu:" prefix from thread names	2022-10-03 18:43:56 -04:00
Liam	ae7062d522	shader_recompiler: add extended LDC to GLASM backend	2022-10-02 17:32:54 -04:00
Liam	1225627515	macro_jit_x64: fix miscompilation of bit extraction operations	2022-10-01 20:31:21 -04:00
Kyle Kienapfel	1dba5fab62	Qt: work around Qt5's font choice for Chinese On Windows there are currently two fonts used. The first, does the Menu, QTreeView and Tooltips Second is Everything else which is a default font. From inspecting QApplication::font() at runtime Windows 10 English: QFont(MS Shell Dlg 2,8.25,-1,5,50,0,0,0,0,0) Windows 11 Japanese: MS UI Gothic,9 ,-1,5,50,0,0,0,0,0 Windows 11 Traditional Chinese: PMingLiU,9 ,-1,5,50,0,0,0,0,0 Windows 11 Simplified Chinese: SimSun,9 ,-1,5,50,0,0,0,0,0 Windows 11 Korean: Gulim,9 ,-1,5,50,0,0,0,0,0 I initially investigated dynamically changing the font when the UI language is English, but this was getting quite messy Qt6 makes changes to default font in some situations, so this PR is being narrowed in scope to only effect Chinese font choices. This change only effects rendering of Latin/Cyrillic characters.	2022-10-01 15:27:23 -07:00
Liam	b80f7faebe	macro_jit_x64: cancel exit for taken branch	2022-10-01 01:32:24 -04:00
Liam	087c6c2ef1	vulkan: automatically use larger staging buffer sizes when possible	2022-09-25 02:28:03 -04:00
Andrea Pappacoda	db88eaa346	build(room): simplify yuzu-room installation CMake is able to automatically install binaries in the correct location. Also see my older patch, `af94bf4a59` Cc: @FearlessTobi	2022-09-22 21:51:56 +02:00
Alexandre Bouvier	09a87966e0	cmake: Fix FindPkgConfig	2022-09-20 22:21:52 +02:00
lat9nq	bfb7cbc292	program_metadata: Unpack FileAccessHeader and FileAccessControl Avoids a reference binding to a misaligned addresses. Unpacking one requires unpacking the other, otherwise there'll be a misaligned address on the leftover one.	2022-02-13 02:20:56 -05:00