Addressed issues

core: Make nvservices more standardized
Merge pull request #4909 from lioncash/interrupt
2020-11-10 15:57:36 +11:00 · 2020-11-10 15:57:35 +11:00 · 2020-11-08 22:09:40 -03:00 · 2020-11-08 19:11:31 -03:00 · 2020-11-08 15:58:11 -05:00 · 2020-11-08 15:37:04 -05:00
107 changed files with 1995 additions and 1773 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,9 +7,6 @@
 [submodule "dynarmic"]
    path = externals/dynarmic
    url = https://github.com/MerryMage/dynarmic.git
-[submodule "unicorn"]
-    path = externals/unicorn
-    url = https://github.com/yuzu-emu/unicorn
 [submodule "soundtouch"]
    path = externals/soundtouch
    url = https://github.com/citra-emu/ext-soundtouch.git
--- a/.travis/linux-mingw/docker.sh
+++ b/.travis/linux-mingw/docker.sh
@@ -4,16 +4,8 @@ cd /yuzu
 # override Travis CI unreasonable ccache size
 echo 'max_size = 3.0G' > "$HOME/.ccache/ccache.conf"

-# Dirty hack to trick unicorn makefile into believing we are in a MINGW system
-mv /bin/uname /bin/uname1 && echo -e '#!/bin/sh\necho MINGW64' >> /bin/uname
-chmod +x /bin/uname
-
-# Dirty hack to trick unicorn makefile into believing we have cmd
-echo '' >> /bin/cmd
-chmod +x /bin/cmd
-
 mkdir build && cd build
-cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
+cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
 ninja

 # Clean up the dirty hacks
--- a/.travis/linux/docker.sh
+++ b/.travis/linux/docker.sh
@@ -3,7 +3,7 @@
 cd /yuzu

 mkdir build && cd build
-cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
+cmake .. -G Ninja -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
 ninja

 ccache -s
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -4,13 +4,12 @@ set -o pipefail

 export MACOSX_DEPLOYMENT_TARGET=10.14
 export Qt5_DIR=$(brew --prefix)/opt/qt5
-export UNICORNDIR=$(pwd)/externals/unicorn
 export PATH="/usr/local/opt/ccache/libexec:$PATH"

 # TODO: Build using ninja instead of make
 mkdir build && cd build
 cmake --version
-cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON
+cmake .. -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON
 make -j4

 ccache -s
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,6 @@ CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" ON "EN

 option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)

-option(YUZU_USE_BUNDLED_UNICORN "Build/Download bundled Unicorn" ON)
-
 option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OFF)

 option(YUZU_ENABLE_BOXCAT "Enable the Boxcat service, a yuzu high-level implementation of BCAT" ON)
@@ -161,7 +159,7 @@ macro(yuzu_find_packages)
    #    Cmake Pkg Prefix  Version     Conan Pkg
        "Boost             1.73        boost/1.73.0"
        "Catch2            2.13        catch2/2.13.0"
-        "fmt               7.1         fmt/7.1.0"
+        "fmt               7.1         fmt/7.1.2"
    # can't use until https://github.com/bincrafters/community/issues/1173
        #"libzip            1.5         libzip/1.5.2@bincrafters/stable"
        "lz4               1.8         lz4/1.9.2"
@@ -372,81 +370,6 @@ endif()
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)

-# If unicorn isn't found, msvc -> download bundled unicorn; everyone else -> build external
-if (YUZU_USE_BUNDLED_UNICORN)
-    if (MSVC)
-        message(STATUS "unicorn not found, falling back to bundled")
-        # Detect toolchain and platform
-        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
-            set(UNICORN_VER "unicorn-yuzu")
-        else()
-            message(FATAL_ERROR "No bundled Unicorn binaries for your toolchain. Disable YUZU_USE_BUNDLED_UNICORN and provide your own.")
-        endif()
-
-        if (DEFINED UNICORN_VER)
-            download_bundled_external("unicorn/" ${UNICORN_VER} UNICORN_PREFIX)
-        endif()
-
-        if (DEFINED UNICORN_VER)
-            download_bundled_external("unicorn/" ${UNICORN_VER} UNICORN_PREFIX)
-        endif()
-
-        set(UNICORN_FOUND YES)
-        set(LIBUNICORN_INCLUDE_DIR "${UNICORN_PREFIX}/include" CACHE PATH "Path to Unicorn headers" FORCE)
-        set(LIBUNICORN_LIBRARY "${UNICORN_PREFIX}/lib/x64/unicorn_dynload.lib" CACHE PATH "Path to Unicorn library" FORCE)
-        set(UNICORN_DLL_DIR "${UNICORN_PREFIX}/lib/x64/" CACHE PATH "Path to unicorn.dll" FORCE)
-    else()
-        message(STATUS "unicorn not found, falling back to externals")
-        if (MINGW)
-            set(UNICORN_LIB_NAME "unicorn.a")
-        else()
-            set(UNICORN_LIB_NAME "libunicorn.a")
-        endif()
-
-        set(UNICORN_FOUND YES)
-        set(UNICORN_PREFIX ${PROJECT_SOURCE_DIR}/externals/unicorn)
-        set(LIBUNICORN_LIBRARY "${UNICORN_PREFIX}/${UNICORN_LIB_NAME}" CACHE PATH "Path to Unicorn library" FORCE)
-        set(LIBUNICORN_INCLUDE_DIR "${UNICORN_PREFIX}/include" CACHE PATH "Path to Unicorn headers" FORCE)
-        set(UNICORN_DLL_DIR "${UNICORN_PREFIX}/" CACHE PATH "Path to unicorn dynamic library" FORCE)
-
-        find_package(PythonInterp 2.7 REQUIRED)
-
-        if (MINGW)
-            # Intentionally call the unicorn makefile directly instead of using make.sh so that we can override the
-            # UNAME_S makefile variable to MINGW. This way we don't have to hack at the uname binary to build
-            # Additionally, overriding DO_WINDOWS_EXPORT prevents unicorn from patching the static unicorn.a by using msvc and cmd,
-            # which are both things we don't have in a mingw cross compiling environment.
-            add_custom_command(OUTPUT ${LIBUNICORN_LIBRARY}
-                COMMAND ${CMAKE_COMMAND} -E env UNICORN_ARCHS="aarch64" PYTHON="${PYTHON_EXECUTABLE}" CC=x86_64-w64-mingw32-gcc AR=x86_64-w64-mingw32-gcc-ar RANLIB=x86_64-w64-mingw32-gcc-ranlib make UNAME_S=MINGW DO_WINDOWS_EXPORT=0
-                WORKING_DIRECTORY ${UNICORN_PREFIX}
-            )
-        else()
-            add_custom_command(OUTPUT ${LIBUNICORN_LIBRARY}
-                COMMAND ${CMAKE_COMMAND} -E env UNICORN_ARCHS="aarch64" PYTHON="${PYTHON_EXECUTABLE}" /bin/sh make.sh macos-universal-no
-                WORKING_DIRECTORY ${UNICORN_PREFIX}
-            )
-        endif()
-
-        # ALL makes this custom target build every time
-        # but it won't actually build if LIBUNICORN_LIBRARY is up to date
-        add_custom_target(unicorn-build ALL
-            DEPENDS ${LIBUNICORN_LIBRARY}
-        )
-        unset(UNICORN_LIB_NAME)
-    endif()
-else()
-    find_package(Unicorn REQUIRED)
-endif()
-
-if (UNICORN_FOUND)
-    add_library(unicorn INTERFACE)
-    add_dependencies(unicorn unicorn-build)
-    target_link_libraries(unicorn INTERFACE "${LIBUNICORN_LIBRARY}")
-    target_include_directories(unicorn INTERFACE "${LIBUNICORN_INCLUDE_DIR}")
-else()
-    message(FATAL_ERROR "Could not find or build unicorn which is required.")
-endif()
-
 # Platform-specific library requirements
 # ======================================

--- a/CMakeModules/CopyYuzuUnicornDeps.cmake
+++ b/CMakeModules/CopyYuzuUnicornDeps.cmake
@@ -1,9 +0,0 @@
-function(copy_yuzu_unicorn_deps target_dir)
-    include(WindowsCopyFiles)
-    set(DLL_DEST "${CMAKE_BINARY_DIR}/bin/$<CONFIG>/")
-    windows_copy_files(${target_dir} ${UNICORN_DLL_DIR} ${DLL_DEST}
-        libgcc_s_seh-1.dll
-        libwinpthread-1.dll
-        unicorn.dll
-    )
-endfunction(copy_yuzu_unicorn_deps)
--- a/externals/microprofile/microprofile.h
+++ b/externals/microprofile/microprofile.h
@@ -902,8 +902,10 @@ inline uint16_t MicroProfileGetGroupIndex(MicroProfileToken t)
 #include <windows.h>
 #define snprintf _snprintf

+#ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable: 4244)
+#endif
 int64_t MicroProfileTicksPerSecondCpu()
 {
    static int64_t nTicksPerSecond = 0;
@@ -946,7 +948,11 @@ typedef HANDLE MicroProfileThread;
 DWORD _stdcall ThreadTrampoline(void* pFunc)
 {
    MicroProfileThreadFunc F = (MicroProfileThreadFunc)pFunc;
-    return (uint32_t)F(0);
+
+    // The return value of F will always return a void*, however, this is for
+    // compatibility with pthreads. The underlying "address" of the pointer
+    // is always a 32-bit value, so this cast is safe to perform.
+    return static_cast<DWORD>(reinterpret_cast<uint64_t>(F(0)));
 }

 inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
@@ -1742,10 +1748,10 @@ void MicroProfileFlip()
                            }
                        }
                    }
-                    for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
+                    for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
                    {
-                        pLog->nGroupTicks[i] += nGroupTicks[i];
-                        pFrameGroup[i] += nGroupTicks[i];
+                        pLog->nGroupTicks[j] += nGroupTicks[j];
+                        pFrameGroup[j] += nGroupTicks[j];
                    }
                    pLog->nStackPos = nStackPos;
                }
@@ -3328,7 +3334,7 @@ bool MicroProfileIsLocalThread(uint32_t nThreadId)
 #endif
 #else

-bool MicroProfileIsLocalThread(uint32_t nThreadId){return false;}
+bool MicroProfileIsLocalThread([[maybe_unused]] uint32_t nThreadId) { return false; }
 void MicroProfileStopContextSwitchTrace(){}
 void MicroProfileStartContextSwitchTrace(){}

@@ -3576,7 +3582,7 @@ int MicroProfileGetGpuTickReference(int64_t* pOutCpu, int64_t* pOutGpu)

 #undef S

-#ifdef _WIN32
+#ifdef _MSC_VER
 #pragma warning(pop)
 #endif

--- a/externals/unicorn
+++ b/externals/unicorn
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -46,7 +46,6 @@ if (MSVC)
        # Warnings
        /W3
        /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect
-        /we4548 # Expression before comma has no effect; expected expression with side-effect
        /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'?
        /we4555 # Expression has no effect; expected expression with side-effect
        /we4834 # Discarding return value of function with 'nodiscard' attribute
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -190,6 +190,22 @@ if(ARCHITECTURE_x86_64)
    )
 endif()

+if (MSVC)
+  target_compile_definitions(common PRIVATE
+    # The standard library doesn't provide any replacement for codecvt yet
+    # so we can disable this deprecation warning for the time being.
+    _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
+  )
+  target_compile_options(common PRIVATE
+    /W4
+    /WX
+  )
+else()
+  target_compile_options(common PRIVATE
+    -Werror
+  )
+endif()
+
 create_target_directory_groups(common)
 find_package(Boost 1.71 COMPONENTS context headers REQUIRED)

--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -4,6 +4,8 @@

 #include "common/assert.h"
 #include "common/fiber.h"
+#include "common/spin_lock.h"
+
 #if defined(_WIN32) || defined(WIN32)
 #include <windows.h>
 #else
@@ -14,18 +16,45 @@ namespace Common {

 constexpr std::size_t default_stack_size = 256 * 1024; // 256kb

-#if defined(_WIN32) || defined(WIN32)
-
 struct Fiber::FiberImpl {
+    SpinLock guard{};
+    std::function<void(void*)> entry_point;
+    std::function<void(void*)> rewind_point;
+    void* rewind_parameter{};
+    void* start_parameter{};
+    std::shared_ptr<Fiber> previous_fiber;
+    bool is_thread_fiber{};
+    bool released{};
+
+#if defined(_WIN32) || defined(WIN32)
    LPVOID handle = nullptr;
    LPVOID rewind_handle = nullptr;
+#else
+    alignas(64) std::array<u8, default_stack_size> stack;
+    alignas(64) std::array<u8, default_stack_size> rewind_stack;
+    u8* stack_limit;
+    u8* rewind_stack_limit;
+    boost::context::detail::fcontext_t context;
+    boost::context::detail::fcontext_t rewind_context;
+#endif
 };

+void Fiber::SetStartParameter(void* new_parameter) {
+    impl->start_parameter = new_parameter;
+}
+
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param) {
+    impl->rewind_point = std::move(rewind_func);
+    impl->rewind_parameter = rewind_param;
+}
+
+#if defined(_WIN32) || defined(WIN32)
+
 void Fiber::Start() {
-    ASSERT(previous_fiber != nullptr);
-    previous_fiber->guard.unlock();
-    previous_fiber.reset();
-    entry_point(start_parameter);
+    ASSERT(impl->previous_fiber != nullptr);
+    impl->previous_fiber->impl->guard.unlock();
+    impl->previous_fiber.reset();
+    impl->entry_point(impl->start_parameter);
    UNREACHABLE();
 }

@@ -34,58 +63,54 @@ void Fiber::OnRewind() {
    DeleteFiber(impl->handle);
    impl->handle = impl->rewind_handle;
    impl->rewind_handle = nullptr;
-    rewind_point(rewind_parameter);
+    impl->rewind_point(impl->rewind_parameter);
    UNREACHABLE();
 }

 void Fiber::FiberStartFunc(void* fiber_parameter) {
-    auto fiber = static_cast<Fiber*>(fiber_parameter);
+    auto* fiber = static_cast<Fiber*>(fiber_parameter);
    fiber->Start();
 }

 void Fiber::RewindStartFunc(void* fiber_parameter) {
-    auto fiber = static_cast<Fiber*>(fiber_parameter);
+    auto* fiber = static_cast<Fiber*>(fiber_parameter);
    fiber->OnRewind();
 }

 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
-    : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
-    impl = std::make_unique<FiberImpl>();
+    : impl{std::make_unique<FiberImpl>()} {
+    impl->entry_point = std::move(entry_point_func);
+    impl->start_parameter = start_parameter;
    impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this);
 }

 Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {}

 Fiber::~Fiber() {
-    if (released) {
+    if (impl->released) {
        return;
    }
    // Make sure the Fiber is not being used
-    const bool locked = guard.try_lock();
+    const bool locked = impl->guard.try_lock();
    ASSERT_MSG(locked, "Destroying a fiber that's still running");
    if (locked) {
-        guard.unlock();
+        impl->guard.unlock();
    }
    DeleteFiber(impl->handle);
 }

 void Fiber::Exit() {
-    ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
-    if (!is_thread_fiber) {
+    ASSERT_MSG(impl->is_thread_fiber, "Exitting non main thread fiber");
+    if (!impl->is_thread_fiber) {
        return;
    }
    ConvertFiberToThread();
-    guard.unlock();
-    released = true;
-}
-
-void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
-    rewind_point = std::move(rewind_func);
-    rewind_parameter = start_parameter;
+    impl->guard.unlock();
+    impl->released = true;
 }

 void Fiber::Rewind() {
-    ASSERT(rewind_point);
+    ASSERT(impl->rewind_point);
    ASSERT(impl->rewind_handle == nullptr);
    impl->rewind_handle = CreateFiber(default_stack_size, &RewindStartFunc, this);
    SwitchToFiber(impl->rewind_handle);
@@ -94,39 +119,30 @@ void Fiber::Rewind() {
 void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
    ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
    ASSERT_MSG(to != nullptr, "Next fiber is null!");
-    to->guard.lock();
-    to->previous_fiber = from;
+    to->impl->guard.lock();
+    to->impl->previous_fiber = from;
    SwitchToFiber(to->impl->handle);
-    ASSERT(from->previous_fiber != nullptr);
-    from->previous_fiber->guard.unlock();
-    from->previous_fiber.reset();
+    ASSERT(from->impl->previous_fiber != nullptr);
+    from->impl->previous_fiber->impl->guard.unlock();
+    from->impl->previous_fiber.reset();
 }

 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
    std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
-    fiber->guard.lock();
+    fiber->impl->guard.lock();
    fiber->impl->handle = ConvertThreadToFiber(nullptr);
-    fiber->is_thread_fiber = true;
+    fiber->impl->is_thread_fiber = true;
    return fiber;
 }

 #else

-struct Fiber::FiberImpl {
-    alignas(64) std::array<u8, default_stack_size> stack;
-    alignas(64) std::array<u8, default_stack_size> rewind_stack;
-    u8* stack_limit;
-    u8* rewind_stack_limit;
-    boost::context::detail::fcontext_t context;
-    boost::context::detail::fcontext_t rewind_context;
-};
-
 void Fiber::Start(boost::context::detail::transfer_t& transfer) {
-    ASSERT(previous_fiber != nullptr);
-    previous_fiber->impl->context = transfer.fctx;
-    previous_fiber->guard.unlock();
-    previous_fiber.reset();
-    entry_point(start_parameter);
+    ASSERT(impl->previous_fiber != nullptr);
+    impl->previous_fiber->impl->context = transfer.fctx;
+    impl->previous_fiber->impl->guard.unlock();
+    impl->previous_fiber.reset();
+    impl->entry_point(impl->start_parameter);
    UNREACHABLE();
 }

@@ -137,23 +153,24 @@ void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transf
    u8* tmp = impl->stack_limit;
    impl->stack_limit = impl->rewind_stack_limit;
    impl->rewind_stack_limit = tmp;
-    rewind_point(rewind_parameter);
+    impl->rewind_point(impl->rewind_parameter);
    UNREACHABLE();
 }

 void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
-    auto fiber = static_cast<Fiber*>(transfer.data);
+    auto* fiber = static_cast<Fiber*>(transfer.data);
    fiber->Start(transfer);
 }

 void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) {
-    auto fiber = static_cast<Fiber*>(transfer.data);
+    auto* fiber = static_cast<Fiber*>(transfer.data);
    fiber->OnRewind(transfer);
 }

 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
-    : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
-    impl = std::make_unique<FiberImpl>();
+    : impl{std::make_unique<FiberImpl>()} {
+    impl->entry_point = std::move(entry_point_func);
+    impl->start_parameter = start_parameter;
    impl->stack_limit = impl->stack.data();
    impl->rewind_stack_limit = impl->rewind_stack.data();
    u8* stack_base = impl->stack_limit + default_stack_size;
@@ -161,37 +178,31 @@ Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_paramete
        boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
 }

-void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
-    rewind_point = std::move(rewind_func);
-    rewind_parameter = start_parameter;
-}
-
 Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {}

 Fiber::~Fiber() {
-    if (released) {
+    if (impl->released) {
        return;
    }
    // Make sure the Fiber is not being used
-    const bool locked = guard.try_lock();
+    const bool locked = impl->guard.try_lock();
    ASSERT_MSG(locked, "Destroying a fiber that's still running");
    if (locked) {
-        guard.unlock();
+        impl->guard.unlock();
    }
 }

 void Fiber::Exit() {
-
-    ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
-    if (!is_thread_fiber) {
+    ASSERT_MSG(impl->is_thread_fiber, "Exitting non main thread fiber");
+    if (!impl->is_thread_fiber) {
        return;
    }
-    guard.unlock();
-    released = true;
+    impl->guard.unlock();
+    impl->released = true;
 }

 void Fiber::Rewind() {
-    ASSERT(rewind_point);
+    ASSERT(impl->rewind_point);
    ASSERT(impl->rewind_context == nullptr);
    u8* stack_base = impl->rewind_stack_limit + default_stack_size;
    impl->rewind_context =
@@ -202,19 +213,19 @@ void Fiber::Rewind() {
 void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
    ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
    ASSERT_MSG(to != nullptr, "Next fiber is null!");
-    to->guard.lock();
-    to->previous_fiber = from;
+    to->impl->guard.lock();
+    to->impl->previous_fiber = from;
    auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get());
-    ASSERT(from->previous_fiber != nullptr);
-    from->previous_fiber->impl->context = transfer.fctx;
-    from->previous_fiber->guard.unlock();
-    from->previous_fiber.reset();
+    ASSERT(from->impl->previous_fiber != nullptr);
+    from->impl->previous_fiber->impl->context = transfer.fctx;
+    from->impl->previous_fiber->impl->guard.unlock();
+    from->impl->previous_fiber.reset();
 }

 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
    std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
-    fiber->guard.lock();
-    fiber->is_thread_fiber = true;
+    fiber->impl->guard.lock();
+    fiber->impl->is_thread_fiber = true;
    return fiber;
 }

--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -7,9 +7,6 @@
 #include <functional>
 #include <memory>

-#include "common/common_types.h"
-#include "common/spin_lock.h"
-
 #if !defined(_WIN32) && !defined(WIN32)
 namespace boost::context::detail {
 struct transfer_t;
@@ -49,7 +46,7 @@ public:
    static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to);
    [[nodiscard]] static std::shared_ptr<Fiber> ThreadToFiber();

-    void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
+    void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param);

    void Rewind();

@@ -57,9 +54,7 @@ public:
    void Exit();

    /// Changes the start parameter of the fiber. Has no effect if the fiber already started
-    void SetStartParameter(void* new_parameter) {
-        start_parameter = new_parameter;
-    }
+    void SetStartParameter(void* new_parameter);

 private:
    Fiber();
@@ -77,16 +72,7 @@ private:
 #endif

    struct FiberImpl;
-
-    SpinLock guard{};
-    std::function<void(void*)> entry_point;
-    std::function<void(void*)> rewind_point;
-    void* rewind_parameter{};
-    void* start_parameter{};
-    std::shared_ptr<Fiber> previous_fiber;
    std::unique_ptr<FiberImpl> impl;
-    bool is_thread_fiber{};
-    bool released{};
 };

 } // namespace Common
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -472,13 +472,14 @@ u64 ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry,
 }

 bool DeleteDirRecursively(const std::string& directory, unsigned int recursion) {
-    const auto callback = [recursion](u64* num_entries_out, const std::string& directory,
-                                      const std::string& virtual_name) -> bool {
-        std::string new_path = directory + DIR_SEP_CHR + virtual_name;
+    const auto callback = [recursion](u64*, const std::string& directory,
+                                      const std::string& virtual_name) {
+        const std::string new_path = directory + DIR_SEP_CHR + virtual_name;

        if (IsDirectory(new_path)) {
-            if (recursion == 0)
+            if (recursion == 0) {
                return false;
+            }
            return DeleteDirRecursively(new_path, recursion - 1);
        }
        return Delete(new_path);
@@ -492,7 +493,8 @@ bool DeleteDirRecursively(const std::string& directory, unsigned int recursion)
    return true;
 }

-void CopyDir(const std::string& source_path, const std::string& dest_path) {
+void CopyDir([[maybe_unused]] const std::string& source_path,
+             [[maybe_unused]] const std::string& dest_path) {
 #ifndef _WIN32
    if (source_path == dest_path) {
        return;
@@ -553,7 +555,7 @@ std::optional<std::string> GetCurrentDir() {
    std::string strDir = dir;
 #endif
    free(dir);
-    return std::move(strDir);
+    return strDir;
 }

 bool SetCurrentDir(const std::string& directory) {
@@ -772,21 +774,23 @@ std::size_t ReadFileToString(bool text_file, const std::string& filename, std::s

 void SplitFilename83(const std::string& filename, std::array<char, 9>& short_name,
                     std::array<char, 4>& extension) {
-    const std::string forbidden_characters = ".\"/\\[]:;=, ";
+    static constexpr std::string_view forbidden_characters = ".\"/\\[]:;=, ";

    // On a FAT32 partition, 8.3 names are stored as a 11 bytes array, filled with spaces.
    short_name = {{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'}};
    extension = {{' ', ' ', ' ', '\0'}};

-    std::string::size_type point = filename.rfind('.');
-    if (point == filename.size() - 1)
+    auto point = filename.rfind('.');
+    if (point == filename.size() - 1) {
        point = filename.rfind('.', point);
+    }

    // Get short name.
    int j = 0;
    for (char letter : filename.substr(0, point)) {
-        if (forbidden_characters.find(letter, 0) != std::string::npos)
+        if (forbidden_characters.find(letter, 0) != std::string::npos) {
            continue;
+        }
        if (j == 8) {
            // TODO(Link Mauve): also do that for filenames containing a space.
            // TODO(Link Mauve): handle multiple files having the same short name.
@@ -794,14 +798,15 @@ void SplitFilename83(const std::string& filename, std::array<char, 9>& short_nam
            short_name[7] = '1';
            break;
        }
-        short_name[j++] = toupper(letter);
+        short_name[j++] = static_cast<char>(std::toupper(letter));
    }

    // Get extension.
    if (point != std::string::npos) {
        j = 0;
-        for (char letter : filename.substr(point + 1, 3))
-            extension[j++] = toupper(letter);
+        for (char letter : filename.substr(point + 1, 3)) {
+            extension[j++] = static_cast<char>(std::toupper(letter));
+        }
    }
 }

--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -274,7 +274,6 @@ const char* GetLogClassName(Class log_class) {
    case Class::Count:
        break;
    }
-    UNREACHABLE();
    return "Invalid";
 }

@@ -293,7 +292,6 @@ const char* GetLevelName(Level log_level) {
        break;
    }
 #undef LVL
-    UNREACHABLE();
    return "Invalid";
 }

--- a/src/common/spin_lock.h
+++ b/src/common/spin_lock.h
@@ -15,6 +15,14 @@ namespace Common {
 */
 class SpinLock {
 public:
+    SpinLock() = default;
+
+    SpinLock(const SpinLock&) = delete;
+    SpinLock& operator=(const SpinLock&) = delete;
+
+    SpinLock(SpinLock&&) = delete;
+    SpinLock& operator=(SpinLock&&) = delete;
+
    void lock();
    void unlock();
    [[nodiscard]] bool try_lock();
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -8,6 +8,7 @@
 #include <cstdlib>
 #include <locale>
 #include <sstream>
+
 #include "common/common_paths.h"
 #include "common/logging/log.h"
 #include "common/string_util.h"
@@ -21,14 +22,14 @@ namespace Common {
 /// Make a string lowercase
 std::string ToLower(std::string str) {
    std::transform(str.begin(), str.end(), str.begin(),
-                   [](unsigned char c) { return std::tolower(c); });
+                   [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
    return str;
 }

 /// Make a string uppercase
 std::string ToUpper(std::string str) {
    std::transform(str.begin(), str.end(), str.begin(),
-                   [](unsigned char c) { return std::toupper(c); });
+                   [](unsigned char c) { return static_cast<char>(std::toupper(c)); });
    return str;
 }

--- a/src/common/timer.cpp
+++ b/src/common/timer.cpp
@@ -142,20 +142,18 @@ std::string Timer::GetTimeFormatted() {
 // ----------------
 double Timer::GetDoubleTime() {
    // Get continuous timestamp
-    u64 TmpSeconds = static_cast<u64>(Common::Timer::GetTimeSinceJan1970().count());
-    double ms = static_cast<u64>(GetTimeMs().count()) % 1000;
+    auto tmp_seconds = static_cast<u64>(GetTimeSinceJan1970().count());
+    const auto ms = static_cast<double>(static_cast<u64>(GetTimeMs().count()) % 1000);

    // Remove a few years. We only really want enough seconds to make
    // sure that we are detecting actual actions, perhaps 60 seconds is
    // enough really, but I leave a year of seconds anyway, in case the
    // user's clock is incorrect or something like that.
-    TmpSeconds = TmpSeconds - (38 * 365 * 24 * 60 * 60);
+    tmp_seconds = tmp_seconds - (38 * 365 * 24 * 60 * 60);

    // Make a smaller integer that fits in the double
-    u32 Seconds = static_cast<u32>(TmpSeconds);
-    double TmpTime = Seconds + ms;
-
-    return TmpTime;
+    const auto seconds = static_cast<u32>(tmp_seconds);
+    return seconds + ms;
 }

 } // Namespace Common
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -53,7 +53,7 @@ public:
        return Common::Divide128On32(temporary, 1000000000).first;
    }

-    void Pause(bool is_paused) override {
+    void Pause([[maybe_unused]] bool is_paused) override {
        // Do nothing in this clock type.
    }

--- a/src/common/x64/native_clock.h
+++ b/src/common/x64/native_clock.h
@@ -34,7 +34,7 @@ private:
    /// value used to reduce the native clocks accuracy as some apss rely on
    /// undefined behavior where the level of accuracy in the clock shouldn't
    /// be higher.
-    static constexpr u64 inaccuracy_mask = ~(0x400 - 1);
+    static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);

    SpinLock rtsc_serialize{};
    u64 last_measure{};
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -13,8 +13,6 @@ add_library(core STATIC
    arm/dynarmic/arm_exclusive_monitor.h
    arm/exclusive_monitor.cpp
    arm/exclusive_monitor.h
-    arm/unicorn/arm_unicorn.cpp
-    arm/unicorn/arm_unicorn.h
    constants.cpp
    constants.h
    core.cpp
@@ -454,6 +452,8 @@ add_library(core STATIC
    hle/service/nvdrv/nvdrv.h
    hle/service/nvdrv/nvmemp.cpp
    hle/service/nvdrv/nvmemp.h
+    hle/service/nvdrv/syncpoint_manager.cpp
+    hle/service/nvdrv/syncpoint_manager.h
    hle/service/nvflinger/buffer_queue.cpp
    hle/service/nvflinger/buffer_queue.h
    hle/service/nvflinger/nvflinger.cpp
@@ -644,7 +644,7 @@ endif()
 create_target_directory_groups(core)

 target_link_libraries(core PUBLIC common PRIVATE audio_core video_core)
-target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls opus unicorn zip)
+target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls opus zip)

 if (YUZU_ENABLE_BOXCAT)
    target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT)
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -147,10 +147,18 @@ std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktraceFromContex
    auto fp = ctx.cpu_registers[29];
    auto lr = ctx.cpu_registers[30];
    while (true) {
-        out.push_back({"", 0, lr, 0});
-        if (!fp) {
+        out.push_back({
+            .module = "",
+            .address = 0,
+            .original_address = lr,
+            .offset = 0,
+            .name = {},
+        });
+
+        if (fp == 0) {
            break;
        }
+
        lr = memory.Read64(fp + 8) - 4;
        fp = memory.Read64(fp);
    }
--- a/src/core/arm/cpu_interrupt_handler.h
+++ b/src/core/arm/cpu_interrupt_handler.h
@@ -21,8 +21,8 @@ public:
    CPUInterruptHandler(const CPUInterruptHandler&) = delete;
    CPUInterruptHandler& operator=(const CPUInterruptHandler&) = delete;

-    CPUInterruptHandler(CPUInterruptHandler&&) = default;
-    CPUInterruptHandler& operator=(CPUInterruptHandler&&) = default;
+    CPUInterruptHandler(CPUInterruptHandler&&) = delete;
+    CPUInterruptHandler& operator=(CPUInterruptHandler&&) = delete;

    bool IsInterrupted() const {
        return is_interrupted;
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -7,6 +7,7 @@
 #include <dynarmic/A32/a32.h>
 #include <dynarmic/A32/config.h>
 #include <dynarmic/A32/context.h>
+#include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/page_table.h"
 #include "core/arm/cpu_interrupt_handler.h"
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -6,6 +6,7 @@
 #include <memory>
 #include <dynarmic/A64/a64.h>
 #include <dynarmic/A64/config.h>
+#include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/page_table.h"
 #include "core/arm/cpu_interrupt_handler.h"
@@ -13,7 +14,6 @@
 #include "core/arm/dynarmic/arm_exclusive_monitor.h"
 #include "core/core.h"
 #include "core/core_timing.h"
-#include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hardware_properties.h"
 #include "core/hle/kernel/process.h"
@@ -82,16 +82,9 @@ public:
    }

    void InterpreterFallback(u64 pc, std::size_t num_instructions) override {
-        LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc,
-                 num_instructions, MemoryReadCode(pc));
-
-        ARM_Interface::ThreadContext64 ctx;
-        parent.SaveContext(ctx);
-        parent.inner_unicorn.LoadContext(ctx);
-        parent.inner_unicorn.ExecuteInstructions(num_instructions);
-        parent.inner_unicorn.SaveContext(ctx);
-        parent.LoadContext(ctx);
-        num_interpreted_instructions += num_instructions;
+        LOG_ERROR(Core_ARM,
+                  "Unimplemented instruction @ 0x{:X} for {} instructions (instr = {:08X})", pc,
+                  num_instructions, MemoryReadCode(pc));
    }

    void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override {
@@ -127,18 +120,17 @@ public:
        if (parent.uses_wall_clock) {
            return;
        }
+
        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
        // rough approximation of the amount of executed ticks in the system, it may be thrown off
        // if not all cores are doing a similar amount of work. Instead of doing this, we should
        // device a way so that timing is consistent across all cores without increasing the ticks 4
        // times.
-        u64 amortized_ticks =
-            (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
+        u64 amortized_ticks = ticks / Core::Hardware::NUM_CPU_CORES;
        // Always execute at least one tick.
        amortized_ticks = std::max<u64>(amortized_ticks, 1);

        parent.system.CoreTiming().AddTicks(amortized_ticks);
-        num_interpreted_instructions = 0;
    }

    u64 GetTicksRemaining() override {
@@ -156,7 +148,6 @@ public:
    }

    ARM_Dynarmic_64& parent;
-    std::size_t num_interpreted_instructions = 0;
    u64 tpidrro_el0 = 0;
    u64 tpidr_el0 = 0;
    static constexpr u64 minimum_run_cycles = 1000U;
@@ -248,12 +239,8 @@ ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handle
                                 bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
                                 std::size_t core_index)
    : ARM_Interface{system, interrupt_handlers, uses_wall_clock},
-      cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system, interrupt_handlers,
-                                                                      uses_wall_clock,
-                                                                      ARM_Unicorn::Arch::AArch64,
-                                                                      core_index},
-      core_index{core_index}, exclusive_monitor{
-                                  dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
+      cb(std::make_unique<DynarmicCallbacks64>(*this)), core_index{core_index},
+      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}

 ARM_Dynarmic_64::~ARM_Dynarmic_64() = default;

--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -12,7 +12,6 @@
 #include "common/hash.h"
 #include "core/arm/arm_interface.h"
 #include "core/arm/exclusive_monitor.h"
-#include "core/arm/unicorn/arm_unicorn.h"

 namespace Core::Memory {
 class Memory;
@@ -71,7 +70,6 @@ private:
    std::unique_ptr<DynarmicCallbacks64> cb;
    JitCacheType jit_cache;
    std::shared_ptr<Dynarmic::A64::Jit> jit;
-    ARM_Unicorn inner_unicorn;

    std::size_t core_index;
    DynarmicExclusiveMonitor& exclusive_monitor;
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -1,295 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <unicorn/arm64.h>
-#include "common/assert.h"
-#include "common/microprofile.h"
-#include "core/arm/cpu_interrupt_handler.h"
-#include "core/arm/unicorn/arm_unicorn.h"
-#include "core/core.h"
-#include "core/core_timing.h"
-#include "core/hle/kernel/scheduler.h"
-#include "core/hle/kernel/svc.h"
-#include "core/memory.h"
-
-namespace Core {
-
-// Load Unicorn DLL once on Windows using RAII
-#ifdef _MSC_VER
-#include <unicorn_dynload.h>
-struct LoadDll {
-private:
-    LoadDll() {
-        ASSERT(uc_dyn_load(NULL, 0));
-    }
-    ~LoadDll() {
-        ASSERT(uc_dyn_free());
-    }
-    static LoadDll g_load_dll;
-};
-LoadDll LoadDll::g_load_dll;
-#endif
-
-#define CHECKED(expr)                                                                              \
-    do {                                                                                           \
-        if (auto _cerr = (expr)) {                                                                 \
-            ASSERT_MSG(false, "Call " #expr " failed with error: {} ({})\n", _cerr,                \
-                       uc_strerror(_cerr));                                                        \
-        }                                                                                          \
-    } while (0)
-
-static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_data) {
-    GDBStub::BreakpointAddress bkpt =
-        GDBStub::GetNextBreakpointFromAddress(address, GDBStub::BreakpointType::Execute);
-    if (GDBStub::IsMemoryBreak() ||
-        (bkpt.type != GDBStub::BreakpointType::None && address == bkpt.address)) {
-        auto core = static_cast<ARM_Unicorn*>(user_data);
-        core->RecordBreak(bkpt);
-        uc_emu_stop(uc);
-    }
-}
-
-static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
-                               void* user_data) {
-    auto* const system = static_cast<System*>(user_data);
-
-    ARM_Interface::ThreadContext64 ctx{};
-    system->CurrentArmInterface().SaveContext(ctx);
-    ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
-               ctx.pc, ctx.cpu_registers[30]);
-
-    return false;
-}
-
-ARM_Unicorn::ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
-                         Arch architecture, std::size_t core_index)
-    : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, core_index{core_index} {
-    const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
-    CHECKED(uc_open(arch, UC_MODE_ARM, &uc));
-
-    auto fpv = 3 << 20;
-    CHECKED(uc_reg_write(uc, UC_ARM64_REG_CPACR_EL1, &fpv));
-
-    uc_hook hook{};
-    CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, UINT64_MAX));
-    CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0,
-                        UINT64_MAX));
-    if (GDBStub::IsServerEnabled()) {
-        CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, UINT64_MAX));
-        last_bkpt_hit = false;
-    }
-}
-
-ARM_Unicorn::~ARM_Unicorn() {
-    CHECKED(uc_close(uc));
-}
-
-void ARM_Unicorn::SetPC(u64 pc) {
-    CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc));
-}
-
-u64 ARM_Unicorn::GetPC() const {
-    u64 val{};
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_PC, &val));
-    return val;
-}
-
-u64 ARM_Unicorn::GetReg(int regn) const {
-    u64 val{};
-    auto treg = UC_ARM64_REG_SP;
-    if (regn <= 28) {
-        treg = (uc_arm64_reg)(UC_ARM64_REG_X0 + regn);
-    } else if (regn < 31) {
-        treg = (uc_arm64_reg)(UC_ARM64_REG_X29 + regn - 29);
-    }
-    CHECKED(uc_reg_read(uc, treg, &val));
-    return val;
-}
-
-void ARM_Unicorn::SetReg(int regn, u64 val) {
-    auto treg = UC_ARM64_REG_SP;
-    if (regn <= 28) {
-        treg = (uc_arm64_reg)(UC_ARM64_REG_X0 + regn);
-    } else if (regn < 31) {
-        treg = (uc_arm64_reg)(UC_ARM64_REG_X29 + regn - 29);
-    }
-    CHECKED(uc_reg_write(uc, treg, &val));
-}
-
-u128 ARM_Unicorn::GetVectorReg(int /*index*/) const {
-    UNIMPLEMENTED();
-    static constexpr u128 res{};
-    return res;
-}
-
-void ARM_Unicorn::SetVectorReg(int /*index*/, u128 /*value*/) {
-    UNIMPLEMENTED();
-}
-
-u32 ARM_Unicorn::GetPSTATE() const {
-    u64 nzcv{};
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &nzcv));
-    return static_cast<u32>(nzcv);
-}
-
-void ARM_Unicorn::SetPSTATE(u32 pstate) {
-    u64 nzcv = pstate;
-    CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &nzcv));
-}
-
-VAddr ARM_Unicorn::GetTlsAddress() const {
-    u64 base{};
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_TPIDRRO_EL0, &base));
-    return base;
-}
-
-void ARM_Unicorn::SetTlsAddress(VAddr base) {
-    CHECKED(uc_reg_write(uc, UC_ARM64_REG_TPIDRRO_EL0, &base));
-}
-
-u64 ARM_Unicorn::GetTPIDR_EL0() const {
-    u64 value{};
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_TPIDR_EL0, &value));
-    return value;
-}
-
-void ARM_Unicorn::SetTPIDR_EL0(u64 value) {
-    CHECKED(uc_reg_write(uc, UC_ARM64_REG_TPIDR_EL0, &value));
-}
-
-void ARM_Unicorn::ChangeProcessorID(std::size_t new_core_id) {
-    core_index = new_core_id;
-}
-
-void ARM_Unicorn::Run() {
-    if (GDBStub::IsServerEnabled()) {
-        ExecuteInstructions(std::max(4000000U, 0U));
-    } else {
-        while (true) {
-            if (interrupt_handlers[core_index].IsInterrupted()) {
-                return;
-            }
-            ExecuteInstructions(10);
-        }
-    }
-}
-
-void ARM_Unicorn::Step() {
-    ExecuteInstructions(1);
-}
-
-MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
-
-void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {
-    MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
-
-    // Temporarily map the code page for Unicorn
-    u64 map_addr{GetPC() & ~Memory::PAGE_MASK};
-    std::vector<u8> page_buffer(Memory::PAGE_SIZE);
-    system.Memory().ReadBlock(map_addr, page_buffer.data(), page_buffer.size());
-
-    CHECKED(uc_mem_map_ptr(uc, map_addr, page_buffer.size(),
-                           UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC, page_buffer.data()));
-    CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
-    CHECKED(uc_mem_unmap(uc, map_addr, page_buffer.size()));
-    if (GDBStub::IsServerEnabled()) {
-        if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {
-            uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
-        }
-
-        Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread();
-        SaveContext(thread->GetContext64());
-        if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) {
-            last_bkpt_hit = false;
-            GDBStub::Break();
-            GDBStub::SendTrap(thread, 5);
-        }
-    }
-}
-
-void ARM_Unicorn::SaveContext(ThreadContext64& ctx) {
-    int uregs[32];
-    void* tregs[32];
-
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_SP, &ctx.sp));
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_PC, &ctx.pc));
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &ctx.pstate));
-
-    for (auto i = 0; i < 29; ++i) {
-        uregs[i] = UC_ARM64_REG_X0 + i;
-        tregs[i] = &ctx.cpu_registers[i];
-    }
-    uregs[29] = UC_ARM64_REG_X29;
-    tregs[29] = (void*)&ctx.cpu_registers[29];
-    uregs[30] = UC_ARM64_REG_X30;
-    tregs[30] = (void*)&ctx.cpu_registers[30];
-
-    CHECKED(uc_reg_read_batch(uc, uregs, tregs, 31));
-
-    for (int i = 0; i < 32; ++i) {
-        uregs[i] = UC_ARM64_REG_Q0 + i;
-        tregs[i] = &ctx.vector_registers[i];
-    }
-
-    CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32));
-}
-
-void ARM_Unicorn::LoadContext(const ThreadContext64& ctx) {
-    int uregs[32];
-    void* tregs[32];
-
-    CHECKED(uc_reg_write(uc, UC_ARM64_REG_SP, &ctx.sp));
-    CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &ctx.pc));
-    CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &ctx.pstate));
-
-    for (int i = 0; i < 29; ++i) {
-        uregs[i] = UC_ARM64_REG_X0 + i;
-        tregs[i] = (void*)&ctx.cpu_registers[i];
-    }
-    uregs[29] = UC_ARM64_REG_X29;
-    tregs[29] = (void*)&ctx.cpu_registers[29];
-    uregs[30] = UC_ARM64_REG_X30;
-    tregs[30] = (void*)&ctx.cpu_registers[30];
-
-    CHECKED(uc_reg_write_batch(uc, uregs, tregs, 31));
-
-    for (auto i = 0; i < 32; ++i) {
-        uregs[i] = UC_ARM64_REG_Q0 + i;
-        tregs[i] = (void*)&ctx.vector_registers[i];
-    }
-
-    CHECKED(uc_reg_write_batch(uc, uregs, tregs, 32));
-}
-
-void ARM_Unicorn::PrepareReschedule() {
-    CHECKED(uc_emu_stop(uc));
-}
-
-void ARM_Unicorn::ClearExclusiveState() {}
-
-void ARM_Unicorn::ClearInstructionCache() {}
-
-void ARM_Unicorn::RecordBreak(GDBStub::BreakpointAddress bkpt) {
-    last_bkpt = bkpt;
-    last_bkpt_hit = true;
-}
-
-void ARM_Unicorn::InterruptHook(uc_engine* uc, u32 int_no, void* user_data) {
-    u32 esr{};
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
-
-    const auto ec = esr >> 26;
-    const auto iss = esr & 0xFFFFFF;
-
-    auto* const arm_instance = static_cast<ARM_Unicorn*>(user_data);
-
-    switch (ec) {
-    case 0x15: // SVC
-        Kernel::Svc::Call(arm_instance->system, iss);
-        break;
-    }
-}
-
-} // namespace Core
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -1,63 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <unicorn/unicorn.h>
-#include "common/common_types.h"
-#include "core/arm/arm_interface.h"
-#include "core/gdbstub/gdbstub.h"
-
-namespace Core {
-
-class System;
-
-class ARM_Unicorn final : public ARM_Interface {
-public:
-    enum class Arch {
-        AArch32, // 32-bit ARM
-        AArch64, // 64-bit ARM
-    };
-
-    explicit ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
-                         Arch architecture, std::size_t core_index);
-    ~ARM_Unicorn() override;
-
-    void SetPC(u64 pc) override;
-    u64 GetPC() const override;
-    u64 GetReg(int index) const override;
-    void SetReg(int index, u64 value) override;
-    u128 GetVectorReg(int index) const override;
-    void SetVectorReg(int index, u128 value) override;
-    u32 GetPSTATE() const override;
-    void SetPSTATE(u32 pstate) override;
-    VAddr GetTlsAddress() const override;
-    void SetTlsAddress(VAddr address) override;
-    void SetTPIDR_EL0(u64 value) override;
-    u64 GetTPIDR_EL0() const override;
-    void ChangeProcessorID(std::size_t new_core_id) override;
-    void PrepareReschedule() override;
-    void ClearExclusiveState() override;
-    void ExecuteInstructions(std::size_t num_instructions);
-    void Run() override;
-    void Step() override;
-    void ClearInstructionCache() override;
-    void PageTableChanged(Common::PageTable&, std::size_t) override {}
-    void RecordBreak(GDBStub::BreakpointAddress bkpt);
-
-    void SaveContext(ThreadContext32& ctx) override {}
-    void SaveContext(ThreadContext64& ctx) override;
-    void LoadContext(const ThreadContext32& ctx) override {}
-    void LoadContext(const ThreadContext64& ctx) override;
-
-private:
-    static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data);
-
-    uc_engine* uc{};
-    GDBStub::BreakpointAddress last_bkpt{};
-    bool last_bkpt_hit = false;
-    std::size_t core_index;
-};
-
-} // namespace Core
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -179,16 +179,18 @@ struct System::Impl {
        arp_manager.ResetAll();

        telemetry_session = std::make_unique<Core::TelemetrySession>();
+
+        gpu_core = VideoCore::CreateGPU(emu_window, system);
+        if (!gpu_core) {
+            return ResultStatus::ErrorVideoCore;
+        }
+
        service_manager = std::make_shared<Service::SM::ServiceManager>(kernel);

        Service::Init(service_manager, system);
        GDBStub::DeferStart();

        interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
-        gpu_core = VideoCore::CreateGPU(emu_window, system);
-        if (!gpu_core) {
-            return ResultStatus::ErrorVideoCore;
-        }

        // Initialize time manager, which must happen after kernel is created
        time_manager.Initialize();
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -12,7 +12,6 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/common_types.h"
-#include "core/core.h"
 #include "core/hle/ipc.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
@@ -73,14 +72,12 @@ public:
        AlwaysMoveHandles = 1,
    };

-    explicit ResponseBuilder(u32* command_buffer) : RequestHelperBase(command_buffer) {}
-
    explicit ResponseBuilder(Kernel::HLERequestContext& context, u32 normal_params_size,
                             u32 num_handles_to_copy = 0, u32 num_objects_to_move = 0,
                             Flags flags = Flags::None)
-
        : RequestHelperBase(context), normal_params_size(normal_params_size),
-          num_handles_to_copy(num_handles_to_copy), num_objects_to_move(num_objects_to_move) {
+          num_handles_to_copy(num_handles_to_copy),
+          num_objects_to_move(num_objects_to_move), kernel{context.kernel} {

        memset(cmdbuf, 0, sizeof(u32) * IPC::COMMAND_BUFFER_LENGTH);

@@ -140,7 +137,6 @@ public:
        if (context->Session()->IsDomain()) {
            context->AddDomainObject(std::move(iface));
        } else {
-            auto& kernel = Core::System::GetInstance().Kernel();
            auto [client, server] = Kernel::Session::Create(kernel, iface->GetServiceName());
            context->AddMoveObject(std::move(client));
            iface->ClientConnected(std::move(server));
@@ -214,6 +210,7 @@ private:
    u32 num_handles_to_copy{};
    u32 num_objects_to_move{}; ///< Domain objects or move handles, context dependent
    std::ptrdiff_t datapayload_index{};
+    Kernel::KernelCore& kernel;
 };

 /// Push ///
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -24,6 +24,10 @@ namespace Core::Memory {
 class Memory;
 }

+namespace IPC {
+class ResponseBuilder;
+}
+
 namespace Service {
 class ServiceFrameworkBase;
 }
@@ -287,6 +291,8 @@ public:
    }

 private:
+    friend class IPC::ResponseBuilder;
+
    void ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, bool incoming);

    std::array<u32, IPC::COMMAND_BUFFER_LENGTH> cmd_buf;
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -2,30 +2,18 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include "common/assert.h"
-#include "common/logging/log.h"
 #include "common/spin_lock.h"
-#include "core/arm/arm_interface.h"
-#ifdef ARCHITECTURE_x86_64
-#include "core/arm/dynarmic/arm_dynarmic_32.h"
-#include "core/arm/dynarmic/arm_dynarmic_64.h"
-#endif
 #include "core/arm/cpu_interrupt_handler.h"
-#include "core/arm/exclusive_monitor.h"
-#include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
 #include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/scheduler.h"
-#include "core/hle/kernel/thread.h"

 namespace Kernel {

 PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler,
                           Core::CPUInterruptHandler& interrupt_handler)
-    : interrupt_handler{interrupt_handler}, core_index{id}, scheduler{scheduler} {
-
-    guard = std::make_unique<Common::SpinLock>();
-}
+    : interrupt_handler{interrupt_handler},
+      core_index{id}, scheduler{scheduler}, guard{std::make_unique<Common::SpinLock>()} {}

 PhysicalCore::~PhysicalCore() = default;

--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -13,7 +13,6 @@
 #include "common/logging/log.h"
 #include "common/thread_queue_list.h"
 #include "core/arm/arm_interface.h"
-#include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
 #include "core/cpu_manager.h"
 #include "core/hardware_properties.h"
@@ -217,8 +216,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
    } else {
        thread->tls_address = 0;
    }
-    // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
-    // to initialize the context
+
    thread->arm_interface.reset();
    if ((type_flags & THREADTYPE_HLE) == 0) {
 #ifdef ARCHITECTURE_x86_64
@@ -231,19 +229,10 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
                system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
                processor_id);
        }
-
 #else
-        if (owner_process && !owner_process->Is64BitProcess()) {
-            thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
-                system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch32,
-                processor_id);
-        } else {
-            thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
-                system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch64,
-                processor_id);
-        }
-        LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
+#error Platform not supported yet.
 #endif
+
        ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top),
                             static_cast<u32>(entry_point), static_cast<u32>(arg));
        ResetThreadContext64(thread->context_64, stack_top, entry_point, arg);
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -11,6 +11,7 @@
 #include "common/string_util.h"
 #include "common/swap.h"
 #include "core/constants.h"
+#include "core/core.h"
 #include "core/core_timing.h"
 #include "core/file_sys/control_metadata.h"
 #include "core/file_sys/patch_manager.h"
--- a/src/core/hle/service/am/applet_ae.cpp
+++ b/src/core/hle/service/am/applet_ae.cpp
@@ -3,8 +3,8 @@
 // Refer to the license.txt file included.

 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/process.h"
 #include "core/hle/service/am/am.h"
 #include "core/hle/service/am/applet_ae.h"
 #include "core/hle/service/nvflinger/nvflinger.h"
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -6,6 +6,7 @@
 #include <numeric>
 #include <vector>
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/file_sys/content_archive.h"
 #include "core/file_sys/control_metadata.h"
 #include "core/file_sys/nca_metadata.h"
--- a/src/core/hle/service/apm/apm.cpp
+++ b/src/core/hle/service/apm/apm.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/service/apm/apm.h"
 #include "core/hle/service/apm/interface.h"
--- a/src/core/hle/service/bcat/module.cpp
+++ b/src/core/hle/service/bcat/module.cpp
@@ -8,6 +8,7 @@
 #include "common/hex_util.h"
 #include "common/logging/log.h"
 #include "common/string_util.h"
+#include "core/core.h"
 #include "core/file_sys/vfs.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/process.h"
--- a/src/core/hle/service/btdrv/btdrv.cpp
+++ b/src/core/hle/service/btdrv/btdrv.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/kernel.h"
--- a/src/core/hle/service/btm/btm.cpp
+++ b/src/core/hle/service/btm/btm.cpp
@@ -5,6 +5,7 @@
 #include <memory>

 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/kernel.h"
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -5,6 +5,7 @@
 #include <queue>
 #include "common/logging/log.h"
 #include "common/uuid.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/writable_event.h"
--- a/src/core/hle/service/glue/arp.cpp
+++ b/src/core/hle/service/glue/arp.cpp
@@ -5,6 +5,7 @@
 #include <memory>

 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/file_sys/control_metadata.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/hle_ipc.h"
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -9,6 +9,7 @@
 #include "common/alignment.h"
 #include "common/hex_util.h"
 #include "common/scope_exit.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/memory/page_table.h"
@@ -166,7 +167,7 @@ public:
            {0, &RelocatableObject::LoadNro, "LoadNro"},
            {1, &RelocatableObject::UnloadNro, "UnloadNro"},
            {2, &RelocatableObject::LoadNrr, "LoadNrr"},
-            {3, nullptr, "UnloadNrr"},
+            {3, &RelocatableObject::UnloadNrr, "UnloadNrr"},
            {4, &RelocatableObject::Initialize, "Initialize"},
            {10, nullptr, "LoadNrrEx"},
        };
@@ -272,6 +273,20 @@ public:
        rb.Push(RESULT_SUCCESS);
    }

+    void UnloadNrr(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const auto pid = rp.Pop<u64>();
+        const auto nrr_address = rp.Pop<VAddr>();
+
+        LOG_DEBUG(Service_LDR, "called with pid={}, nrr_address={:016X}", pid, nrr_address);
+
+        nrr.erase(nrr_address);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+
+        rb.Push(RESULT_SUCCESS);
+    }
+
    bool ValidateRegionForMap(Kernel::Memory::PageTable& page_table, VAddr start,
                              std::size_t size) const {
        constexpr std::size_t padding_size{4 * Kernel::Memory::PageSize};
--- a/src/core/hle/service/lm/lm.cpp
+++ b/src/core/hle/service/lm/lm.cpp
@@ -7,6 +7,7 @@

 #include "common/logging/log.h"
 #include "common/scope_exit.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/service/lm/lm.h"
 #include "core/hle/service/lm/manager.h"
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -24,25 +24,37 @@ public:
    explicit nvdevice(Core::System& system) : system{system} {}
    virtual ~nvdevice() = default;

-    union Ioctl {
-        u32_le raw;
-        BitField<0, 8, u32> cmd;
-        BitField<8, 8, u32> group;
-        BitField<16, 14, u32> length;
-        BitField<30, 1, u32> is_in;
-        BitField<31, 1, u32> is_out;
-    };
-
    /**
-     * Handles an ioctl request.
+     * Handles an ioctl1 request.
     * @param command The ioctl command id.
     * @param input A buffer containing the input data for the ioctl.
     * @param output A buffer where the output data will be written to.
     * @returns The result code of the ioctl.
     */
-    virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-                      std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-                      IoctlVersion version) = 0;
+    virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input,
+                            std::vector<u8>& output) = 0;
+
+    /**
+     * Handles an ioctl2 request.
+     * @param command The ioctl command id.
+     * @param input A buffer containing the input data for the ioctl.
+     * @param inline_input A buffer containing the input data for the ioctl which has been inlined.
+     * @param output A buffer where the output data will be written to.
+     * @returns The result code of the ioctl.
+     */
+    virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
+                            const std::vector<u8>& inline_input, std::vector<u8>& output) = 0;
+
+    /**
+     * Handles an ioctl3 request.
+     * @param command The ioctl command id.
+     * @param input A buffer containing the input data for the ioctl.
+     * @param output A buffer where the output data will be written to.
+     * @param inline_output A buffer where the inlined output data will be written to.
+     * @returns The result code of the ioctl.
+     */
+    virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                            std::vector<u8>& inline_output) = 0;

 protected:
    Core::System& system;
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -18,11 +18,22 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_de
    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
 nvdisp_disp0 ::~nvdisp_disp0() = default;

-u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-                        std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-                        IoctlVersion version) {
-    UNIMPLEMENTED_MSG("Unimplemented ioctl");
-    return 0;
+NvResult nvdisp_disp0::Ioctl1(Ioctl command, const std::vector<u8>& input,
+                              std::vector<u8>& output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvdisp_disp0::Ioctl2(Ioctl command, const std::vector<u8>& input,
+                              const std::vector<u8>& inline_input, std::vector<u8>& output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvdisp_disp0::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                              std::vector<u8>& inline_output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
 }

 void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -20,9 +20,11 @@ public:
    explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
    ~nvdisp_disp0() override;

-    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-              std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-              IoctlVersion version) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
+    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                    std::vector<u8>& inline_output) override;

    /// Performs a screen flip, drawing the buffer pointed to by the handle.
    void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -17,59 +17,77 @@

 namespace Service::Nvidia::Devices {

-namespace NvErrCodes {
-constexpr u32 Success{};
-constexpr u32 OutOfMemory{static_cast<u32>(-12)};
-constexpr u32 InvalidInput{static_cast<u32>(-22)};
-} // namespace NvErrCodes
-
 nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
 nvhost_as_gpu::~nvhost_as_gpu() = default;

-u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-                         std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-                         IoctlVersion version) {
-    LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
-              command.raw, input.size(), output.size());
-
-    switch (static_cast<IoctlCommand>(command.raw)) {
-    case IoctlCommand::IocInitalizeExCommand:
-        return InitalizeEx(input, output);
-    case IoctlCommand::IocAllocateSpaceCommand:
-        return AllocateSpace(input, output);
-    case IoctlCommand::IocMapBufferExCommand:
-        return MapBufferEx(input, output);
-    case IoctlCommand::IocBindChannelCommand:
-        return BindChannel(input, output);
-    case IoctlCommand::IocGetVaRegionsCommand:
-        return GetVARegions(input, output);
-    case IoctlCommand::IocUnmapBufferCommand:
-        return UnmapBuffer(input, output);
-    case IoctlCommand::IocFreeSpaceCommand:
-        return FreeSpace(input, output);
+NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input,
+                               std::vector<u8>& output) {
+    switch (command.group) {
+    case 'A':
+        switch (command.cmd) {
+        case 0x1:
+            return BindChannel(input, output);
+        case 0x2:
+            return AllocateSpace(input, output);
+        case 0x3:
+            return FreeSpace(input, output);
+        case 0x5:
+            return UnmapBuffer(input, output);
+        case 0x6:
+            return MapBufferEx(input, output);
+        case 0x8:
+            return GetVARegions(input, output);
+        case 0x9:
+            return InitalizeEx(input, output);
+        case 0x14:
+            return Remap(input, output);
+        default:
+            break;
+        }
+        break;
    default:
        break;
    }

-    if (static_cast<IoctlCommand>(command.cmd.Value()) == IoctlCommand::IocRemapCommand) {
-        return Remap(input, output);
-    }
-
-    UNIMPLEMENTED_MSG("Unimplemented ioctl command");
-    return 0;
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
 }

-u32 nvhost_as_gpu::InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
+                               const std::vector<u8>& inline_input, std::vector<u8>& output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_as_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                               std::vector<u8>& inline_output) {
+    switch (command.group) {
+    case 'A':
+        switch (command.cmd) {
+        case 0x8:
+            return GetVARegions(input, output, inline_output);
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_as_gpu::InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlInitalizeEx params{};
    std::memcpy(&params, input.data(), input.size());

    LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x{:X}", params.big_page_size);

-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlAllocSpace params{};
    std::memcpy(&params, input.data(), input.size());

@@ -83,17 +101,17 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>&
        params.offset = system.GPU().MemoryManager().Allocate(size, params.align);
    }

-    auto result{NvErrCodes::Success};
+    auto result = NvResult::Success;
    if (!params.offset) {
        LOG_CRITICAL(Service_NVDRV, "allocation failed for size {}", size);
-        result = NvErrCodes::OutOfMemory;
+        result = NvResult::InsufficientMemory;
    }

    std::memcpy(output.data(), &params, output.size());
    return result;
 }

-u32 nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlFreeSpace params{};
    std::memcpy(&params, input.data(), input.size());

@@ -104,15 +122,15 @@ u32 nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& outp
                                       static_cast<std::size_t>(params.pages) * params.page_size);

    std::memcpy(output.data(), &params, output.size());
-    return NvErrCodes::Success;
+    return NvResult::Success;
 }

-u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output) {
    const auto num_entries = input.size() / sizeof(IoctlRemapEntry);

    LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries);

-    auto result{NvErrCodes::Success};
+    auto result = NvResult::Success;
    std::vector<IoctlRemapEntry> entries(num_entries);
    std::memcpy(entries.data(), input.data(), input.size());

@@ -123,7 +141,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
        const auto object{nvmap_dev->GetObject(entry.nvmap_handle)};
        if (!object) {
            LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", entry.nvmap_handle);
-            result = NvErrCodes::InvalidInput;
+            result = NvResult::InvalidState;
            break;
        }

@@ -134,7 +152,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)

        if (!addr) {
            LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!");
-            result = NvErrCodes::InvalidInput;
+            result = NvResult::InvalidState;
            break;
        }
    }
@@ -143,7 +161,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
    return result;
 }

-u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlMapBufferEx params{};
    std::memcpy(&params, input.data(), input.size());

@@ -157,7 +175,7 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
    if (!object) {
        LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", params.nvmap_handle);
        std::memcpy(output.data(), &params, output.size());
-        return NvErrCodes::InvalidInput;
+        return NvResult::InvalidState;
    }

    // The real nvservices doesn't make a distinction between handles and ids, and
@@ -184,16 +202,16 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
                             params.mapping_size, params.offset);

                std::memcpy(output.data(), &params, output.size());
-                return NvErrCodes::InvalidInput;
+                return NvResult::InvalidState;
            }

            std::memcpy(output.data(), &params, output.size());
-            return NvErrCodes::Success;
+            return NvResult::Success;
        } else {
            LOG_CRITICAL(Service_NVDRV, "address not mapped offset={}", params.offset);

            std::memcpy(output.data(), &params, output.size());
-            return NvErrCodes::InvalidInput;
+            return NvResult::InvalidState;
        }
    }

@@ -213,10 +231,10 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
        params.offset = gpu.MemoryManager().Map(physical_address, params.offset, size);
    }

-    auto result{NvErrCodes::Success};
+    auto result = NvResult::Success;
    if (!params.offset) {
        LOG_CRITICAL(Service_NVDRV, "failed to map size={}", size);
-        result = NvErrCodes::InvalidInput;
+        result = NvResult::InvalidState;
    } else {
        AddBufferMap(params.offset, size, physical_address, is_alloc);
    }
@@ -225,7 +243,7 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
    return result;
 }

-u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlUnmapBuffer params{};
    std::memcpy(&params, input.data(), input.size());

@@ -238,20 +256,19 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
    }

    std::memcpy(output.data(), &params, output.size());
-    return NvErrCodes::Success;
+    return NvResult::Success;
 }

-u32 nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlBindChannel params{};
    std::memcpy(&params, input.data(), input.size());
-
-    LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}", params.fd);

    channel = params.fd;
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlGetVaRegions params{};
    std::memcpy(&params, input.data(), input.size());

@@ -270,7 +287,31 @@ u32 nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& o
    // TODO(ogniK): This probably can stay stubbed but should add support way way later

    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
+}
+
+NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output,
+                                     std::vector<u8>& inline_output) {
+    IoctlGetVaRegions params{};
+    std::memcpy(&params, input.data(), input.size());
+
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
+                params.buf_size);
+
+    params.buf_size = 0x30;
+    params.regions[0].offset = 0x04000000;
+    params.regions[0].page_size = 0x1000;
+    params.regions[0].pages = 0x3fbfff;
+
+    params.regions[1].offset = 0x04000000;
+    params.regions[1].page_size = 0x10000;
+    params.regions[1].pages = 0x1bffff;
+
+    // TODO(ogniK): This probably can stay stubbed but should add support way way later
+
+    std::memcpy(output.data(), &params, output.size());
+    std::memcpy(inline_output.data(), &params.regions, inline_output.size());
+    return NvResult::Success;
 }

 std::optional<nvhost_as_gpu::BufferMap> nvhost_as_gpu::FindBufferMap(GPUVAddr gpu_addr) const {
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -30,9 +30,11 @@ public:
    explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
    ~nvhost_as_gpu() override;

-    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-              std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-              IoctlVersion version) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
+    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                    std::vector<u8>& inline_output) override;

 private:
    class BufferMap final {
@@ -74,32 +76,21 @@ private:
        bool is_allocated{};
    };

-    enum class IoctlCommand : u32_le {
-        IocInitalizeExCommand = 0x40284109,
-        IocAllocateSpaceCommand = 0xC0184102,
-        IocRemapCommand = 0x00000014,
-        IocMapBufferExCommand = 0xC0284106,
-        IocBindChannelCommand = 0x40044101,
-        IocGetVaRegionsCommand = 0xC0404108,
-        IocUnmapBufferCommand = 0xC0084105,
-        IocFreeSpaceCommand = 0xC0104103,
-    };
-
    struct IoctlInitalizeEx {
-        u32_le big_page_size; // depends on GPU's available_big_page_sizes; 0=default
-        s32_le as_fd;         // ignored; passes 0
-        u32_le flags;         // passes 0
-        u32_le reserved;      // ignored; passes 0
-        u64_le unk0;
-        u64_le unk1;
-        u64_le unk2;
+        u32_le big_page_size{}; // depends on GPU's available_big_page_sizes; 0=default
+        s32_le as_fd{};         // ignored; passes 0
+        u32_le flags{};         // passes 0
+        u32_le reserved{};      // ignored; passes 0
+        u64_le unk0{};
+        u64_le unk1{};
+        u64_le unk2{};
    };
    static_assert(sizeof(IoctlInitalizeEx) == 40, "IoctlInitalizeEx is incorrect size");

    struct IoctlAllocSpace {
-        u32_le pages;
-        u32_le page_size;
-        AddressSpaceFlags flags;
+        u32_le pages{};
+        u32_le page_size{};
+        AddressSpaceFlags flags{};
        INSERT_PADDING_WORDS(1);
        union {
            u64_le offset;
@@ -109,70 +100,73 @@ private:
    static_assert(sizeof(IoctlAllocSpace) == 24, "IoctlInitalizeEx is incorrect size");

    struct IoctlFreeSpace {
-        u64_le offset;
-        u32_le pages;
-        u32_le page_size;
+        u64_le offset{};
+        u32_le pages{};
+        u32_le page_size{};
    };
    static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size");

    struct IoctlRemapEntry {
-        u16_le flags;
-        u16_le kind;
-        u32_le nvmap_handle;
-        u32_le map_offset;
-        u32_le offset;
-        u32_le pages;
+        u16_le flags{};
+        u16_le kind{};
+        u32_le nvmap_handle{};
+        u32_le map_offset{};
+        u32_le offset{};
+        u32_le pages{};
    };
    static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size");

    struct IoctlMapBufferEx {
-        AddressSpaceFlags flags; // bit0: fixed_offset, bit2: cacheable
-        u32_le kind;             // -1 is default
-        u32_le nvmap_handle;
-        u32_le page_size; // 0 means don't care
-        s64_le buffer_offset;
-        u64_le mapping_size;
-        s64_le offset;
+        AddressSpaceFlags flags{}; // bit0: fixed_offset, bit2: cacheable
+        u32_le kind{};             // -1 is default
+        u32_le nvmap_handle{};
+        u32_le page_size{}; // 0 means don't care
+        s64_le buffer_offset{};
+        u64_le mapping_size{};
+        s64_le offset{};
    };
    static_assert(sizeof(IoctlMapBufferEx) == 40, "IoctlMapBufferEx is incorrect size");

    struct IoctlUnmapBuffer {
-        s64_le offset;
+        s64_le offset{};
    };
    static_assert(sizeof(IoctlUnmapBuffer) == 8, "IoctlUnmapBuffer is incorrect size");

    struct IoctlBindChannel {
-        u32_le fd;
+        s32_le fd{};
    };
    static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size");

    struct IoctlVaRegion {
-        u64_le offset;
-        u32_le page_size;
+        u64_le offset{};
+        u32_le page_size{};
        INSERT_PADDING_WORDS(1);
-        u64_le pages;
+        u64_le pages{};
    };
    static_assert(sizeof(IoctlVaRegion) == 24, "IoctlVaRegion is incorrect size");

    struct IoctlGetVaRegions {
-        u64_le buf_addr; // (contained output user ptr on linux, ignored)
-        u32_le buf_size; // forced to 2*sizeof(struct va_region)
-        u32_le reserved;
-        IoctlVaRegion regions[2];
+        u64_le buf_addr{}; // (contained output user ptr on linux, ignored)
+        u32_le buf_size{}; // forced to 2*sizeof(struct va_region)
+        u32_le reserved{};
+        IoctlVaRegion regions[2]{};
    };
    static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2,
                  "IoctlGetVaRegions is incorrect size");

-    u32 channel{};
+    s32 channel{};

-    u32 InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 Remap(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 FreeSpace(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 BindChannel(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult Remap(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult FreeSpace(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult BindChannel(const std::vector<u8>& input, std::vector<u8>& output);
+
+    NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output,
+                          std::vector<u8>& inline_output);

    std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const;
    void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -15,45 +15,59 @@

 namespace Service::Nvidia::Devices {

-nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface)
-    : nvdevice(system), events_interface{events_interface} {}
+nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface,
+                         SyncpointManager& syncpoint_manager)
+    : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {}
 nvhost_ctrl::~nvhost_ctrl() = default;

-u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-                       std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-                       IoctlVersion version) {
-    LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
-              command.raw, input.size(), output.size());
-
-    switch (static_cast<IoctlCommand>(command.raw)) {
-    case IoctlCommand::IocGetConfigCommand:
-        return NvOsGetConfigU32(input, output);
-    case IoctlCommand::IocCtrlEventWaitCommand:
-        return IocCtrlEventWait(input, output, false, ctrl);
-    case IoctlCommand::IocCtrlEventWaitAsyncCommand:
-        return IocCtrlEventWait(input, output, true, ctrl);
-    case IoctlCommand::IocCtrlEventRegisterCommand:
-        return IocCtrlEventRegister(input, output);
-    case IoctlCommand::IocCtrlEventUnregisterCommand:
-        return IocCtrlEventUnregister(input, output);
-    case IoctlCommand::IocCtrlEventSignalCommand:
-        return IocCtrlEventSignal(input, output);
+NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    switch (command.group) {
+    case 0x0:
+        switch (command.cmd) {
+        case 0x1b:
+            return NvOsGetConfigU32(input, output);
+        case 0x1c:
+            return IocCtrlClearEventWait(input, output);
+        case 0x1d:
+            return IocCtrlEventWait(input, output, false);
+        case 0x1e:
+            return IocCtrlEventWait(input, output, true);
+        case 0x1f:
+            return IocCtrlEventRegister(input, output);
+        case 0x20:
+            return IocCtrlEventUnregister(input, output);
+        }
+        break;
    default:
-        UNIMPLEMENTED_MSG("Unimplemented ioctl");
-        return 0;
+        break;
    }
+
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
 }

-u32 nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_ctrl::Ioctl2(Ioctl command, const std::vector<u8>& input,
+                             const std::vector<u8>& inline_input, std::vector<u8>& output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_ctrl::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                             std::vector<u8>& inline_output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output) {
    IocGetConfigParams params{};
    std::memcpy(&params, input.data(), sizeof(params));
    LOG_TRACE(Service_NVDRV, "called, setting={}!{}", params.domain_str.data(),
              params.param_str.data());
-    return 0x30006; // Returns error on production mode
+    return NvResult::ConfigVarNotFound; // Returns error on production mode
 }

-u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
-                                  bool is_async, IoctlCtrl& ctrl) {
+NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
+                                       bool is_async) {
    IocCtrlEventWaitParams params{};
    std::memcpy(&params, input.data(), sizeof(params));
    LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}",
@@ -70,19 +84,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
        return NvResult::BadParameter;
    }

+    if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
+        params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id);
+        std::memcpy(output.data(), &params, sizeof(params));
+        return NvResult::Success;
+    }
+
+    if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id);
+        syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
+        params.value = new_value;
+        std::memcpy(output.data(), &params, sizeof(params));
+        return NvResult::Success;
+    }
+
    auto event = events_interface.events[event_id];
    auto& gpu = system.GPU();
+
    // This is mostly to take into account unimplemented features. As synced
    // gpu is always synced.
    if (!gpu.IsAsync()) {
-        event.writable->Signal();
+        event.event.writable->Signal();
        return NvResult::Success;
    }
    auto lock = gpu.LockSync();
-    const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);
+    const u32 current_syncpoint_value = event.fence.value;
    const s32 diff = current_syncpoint_value - params.threshold;
    if (diff >= 0) {
-        event.writable->Signal();
+        event.event.writable->Signal();
        params.value = current_syncpoint_value;
        std::memcpy(output.data(), &params, sizeof(params));
        return NvResult::Success;
@@ -109,12 +137,9 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
            params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
        }
        params.value |= event_id;
-        event.writable->Clear();
+        event.event.writable->Clear();
        gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
-        if (!is_async && ctrl.fresh_call) {
-            ctrl.must_delay = true;
-            ctrl.timeout = params.timeout;
-            ctrl.event_id = event_id;
+        if (!is_async) {
            return NvResult::Timeout;
        }
        std::memcpy(output.data(), &params, sizeof(params));
@@ -124,7 +149,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
    return NvResult::BadParameter;
 }

-u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) {
    IocCtrlEventRegisterParams params{};
    std::memcpy(&params, input.data(), sizeof(params));
    const u32 event_id = params.user_event_id & 0x00FF;
@@ -139,7 +164,8 @@ u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<
    return NvResult::Success;
 }

-u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input,
+                                             std::vector<u8>& output) {
    IocCtrlEventUnregisterParams params{};
    std::memcpy(&params, input.data(), sizeof(params));
    const u32 event_id = params.user_event_id & 0x00FF;
@@ -154,24 +180,22 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto
    return NvResult::Success;
 }

-u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
    IocCtrlEventSignalParams params{};
    std::memcpy(&params, input.data(), sizeof(params));
-    // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization
-    // It is believed from RE to cancel the GPU Event. However, better research is required
-    u32 event_id = params.user_event_id & 0x00FF;
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id);
+
+    u32 event_id = params.event_id & 0x00FF;
+    LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id);
+
    if (event_id >= MaxNvEvents) {
        return NvResult::BadParameter;
    }
    if (events_interface.status[event_id] == EventState::Waiting) {
-        auto& gpu = system.GPU();
-        if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id],
-                                      events_interface.assigned_value[event_id])) {
-            events_interface.LiberateEvent(event_id);
-            events_interface.events[event_id].writable->Signal();
-        }
+        events_interface.LiberateEvent(event_id);
    }
+
+    syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id);
+
    return NvResult::Success;
 }

--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -14,137 +14,120 @@ namespace Service::Nvidia::Devices {

 class nvhost_ctrl final : public nvdevice {
 public:
-    explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface);
+    explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface,
+                         SyncpointManager& syncpoint_manager);
    ~nvhost_ctrl() override;

-    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-              std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-              IoctlVersion version) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
+    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                    std::vector<u8>& inline_output) override;

 private:
-    enum class IoctlCommand : u32_le {
-        IocSyncptReadCommand = 0xC0080014,
-        IocSyncptIncrCommand = 0x40040015,
-        IocSyncptWaitCommand = 0xC00C0016,
-        IocModuleMutexCommand = 0x40080017,
-        IocModuleRegRDWRCommand = 0xC0180018,
-        IocSyncptWaitexCommand = 0xC0100019,
-        IocSyncptReadMaxCommand = 0xC008001A,
-        IocGetConfigCommand = 0xC183001B,
-        IocCtrlEventSignalCommand = 0xC004001C,
-        IocCtrlEventWaitCommand = 0xC010001D,
-        IocCtrlEventWaitAsyncCommand = 0xC010001E,
-        IocCtrlEventRegisterCommand = 0xC004001F,
-        IocCtrlEventUnregisterCommand = 0xC0040020,
-        IocCtrlEventKillCommand = 0x40080021,
-    };
    struct IocSyncptReadParams {
-        u32_le id;
-        u32_le value;
+        u32_le id{};
+        u32_le value{};
    };
    static_assert(sizeof(IocSyncptReadParams) == 8, "IocSyncptReadParams is incorrect size");

    struct IocSyncptIncrParams {
-        u32_le id;
+        u32_le id{};
    };
    static_assert(sizeof(IocSyncptIncrParams) == 4, "IocSyncptIncrParams is incorrect size");

    struct IocSyncptWaitParams {
-        u32_le id;
-        u32_le thresh;
-        s32_le timeout;
+        u32_le id{};
+        u32_le thresh{};
+        s32_le timeout{};
    };
    static_assert(sizeof(IocSyncptWaitParams) == 12, "IocSyncptWaitParams is incorrect size");

    struct IocModuleMutexParams {
-        u32_le id;
-        u32_le lock; // (0 = unlock and 1 = lock)
+        u32_le id{};
+        u32_le lock{}; // (0 = unlock and 1 = lock)
    };
    static_assert(sizeof(IocModuleMutexParams) == 8, "IocModuleMutexParams is incorrect size");

    struct IocModuleRegRDWRParams {
-        u32_le id;
-        u32_le num_offsets;
-        u32_le block_size;
-        u32_le offsets;
-        u32_le values;
-        u32_le write;
+        u32_le id{};
+        u32_le num_offsets{};
+        u32_le block_size{};
+        u32_le offsets{};
+        u32_le values{};
+        u32_le write{};
    };
    static_assert(sizeof(IocModuleRegRDWRParams) == 24, "IocModuleRegRDWRParams is incorrect size");

    struct IocSyncptWaitexParams {
-        u32_le id;
-        u32_le thresh;
-        s32_le timeout;
-        u32_le value;
+        u32_le id{};
+        u32_le thresh{};
+        s32_le timeout{};
+        u32_le value{};
    };
    static_assert(sizeof(IocSyncptWaitexParams) == 16, "IocSyncptWaitexParams is incorrect size");

    struct IocSyncptReadMaxParams {
-        u32_le id;
-        u32_le value;
+        u32_le id{};
+        u32_le value{};
    };
    static_assert(sizeof(IocSyncptReadMaxParams) == 8, "IocSyncptReadMaxParams is incorrect size");

    struct IocGetConfigParams {
-        std::array<char, 0x41> domain_str;
-        std::array<char, 0x41> param_str;
-        std::array<char, 0x101> config_str;
+        std::array<char, 0x41> domain_str{};
+        std::array<char, 0x41> param_str{};
+        std::array<char, 0x101> config_str{};
    };
    static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size");

    struct IocCtrlEventSignalParams {
-        u32_le user_event_id;
+        u32_le event_id{};
    };
    static_assert(sizeof(IocCtrlEventSignalParams) == 4,
                  "IocCtrlEventSignalParams is incorrect size");

    struct IocCtrlEventWaitParams {
-        u32_le syncpt_id;
-        u32_le threshold;
-        s32_le timeout;
-        u32_le value;
+        u32_le syncpt_id{};
+        u32_le threshold{};
+        s32_le timeout{};
+        u32_le value{};
    };
    static_assert(sizeof(IocCtrlEventWaitParams) == 16, "IocCtrlEventWaitParams is incorrect size");

    struct IocCtrlEventWaitAsyncParams {
-        u32_le syncpt_id;
-        u32_le threshold;
-        u32_le timeout;
-        u32_le value;
+        u32_le syncpt_id{};
+        u32_le threshold{};
+        u32_le timeout{};
+        u32_le value{};
    };
    static_assert(sizeof(IocCtrlEventWaitAsyncParams) == 16,
                  "IocCtrlEventWaitAsyncParams is incorrect size");

    struct IocCtrlEventRegisterParams {
-        u32_le user_event_id;
+        u32_le user_event_id{};
    };
    static_assert(sizeof(IocCtrlEventRegisterParams) == 4,
                  "IocCtrlEventRegisterParams is incorrect size");

    struct IocCtrlEventUnregisterParams {
-        u32_le user_event_id;
+        u32_le user_event_id{};
    };
    static_assert(sizeof(IocCtrlEventUnregisterParams) == 4,
                  "IocCtrlEventUnregisterParams is incorrect size");

    struct IocCtrlEventKill {
-        u64_le user_events;
+        u64_le user_events{};
    };
    static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size");

-    u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
-
-    u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async,
-                         IoctlCtrl& ctrl);
-
-    u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output);
-
-    u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
-
-    u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async);
+    NvResult IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);

    EventInterface& events_interface;
+    SyncpointManager& syncpoint_manager;
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -15,39 +15,112 @@ namespace Service::Nvidia::Devices {
 nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {}
 nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default;

-u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input,
-                           const std::vector<u8>& input2, std::vector<u8>& output,
-                           std::vector<u8>& output2, IoctlCtrl& ctrl, IoctlVersion version) {
-    LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
-              command.raw, input.size(), output.size());
-
-    switch (static_cast<IoctlCommand>(command.raw)) {
-    case IoctlCommand::IocGetCharacteristicsCommand:
-        return GetCharacteristics(input, output, output2, version);
-    case IoctlCommand::IocGetTPCMasksCommand:
-        return GetTPCMasks(input, output, output2, version);
-    case IoctlCommand::IocGetActiveSlotMaskCommand:
-        return GetActiveSlotMask(input, output);
-    case IoctlCommand::IocZcullGetCtxSizeCommand:
-        return ZCullGetCtxSize(input, output);
-    case IoctlCommand::IocZcullGetInfo:
-        return ZCullGetInfo(input, output);
-    case IoctlCommand::IocZbcSetTable:
-        return ZBCSetTable(input, output);
-    case IoctlCommand::IocZbcQueryTable:
-        return ZBCQueryTable(input, output);
-    case IoctlCommand::IocFlushL2:
-        return FlushL2(input, output);
-    case IoctlCommand::IocGetGpuTime:
-        return GetGpuTime(input, output);
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented ioctl");
-        return 0;
+NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input,
+                                 std::vector<u8>& output) {
+    switch (command.group) {
+    case 'G':
+        switch (command.cmd) {
+        case 0x1:
+            return ZCullGetCtxSize(input, output);
+        case 0x2:
+            return ZCullGetInfo(input, output);
+        case 0x3:
+            return ZBCSetTable(input, output);
+        case 0x4:
+            return ZBCQueryTable(input, output);
+        case 0x5:
+            return GetCharacteristics(input, output);
+        case 0x6:
+            return GetTPCMasks(input, output);
+        case 0x7:
+            return FlushL2(input, output);
+        case 0x14:
+            return GetActiveSlotMask(input, output);
+        case 0x1c:
+            return GetGpuTime(input, output);
+        default:
+            break;
+        }
+        break;
    }
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
 }

-u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
-                                        std::vector<u8>& output2, IoctlVersion version) {
+NvResult nvhost_ctrl_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
+                                 const std::vector<u8>& inline_input, std::vector<u8>& output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_ctrl_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input,
+                                 std::vector<u8>& output, std::vector<u8>& inline_output) {
+    switch (command.group) {
+    case 'G':
+        switch (command.cmd) {
+        case 0x5:
+            return GetCharacteristics(input, output, inline_output);
+        case 0x6:
+            return GetTPCMasks(input, output, inline_output);
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input,
+                                             std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called");
+    IoctlCharacteristics params{};
+    std::memcpy(&params, input.data(), input.size());
+    params.gc.arch = 0x120;
+    params.gc.impl = 0xb;
+    params.gc.rev = 0xa1;
+    params.gc.num_gpc = 0x1;
+    params.gc.l2_cache_size = 0x40000;
+    params.gc.on_board_video_memory_size = 0x0;
+    params.gc.num_tpc_per_gpc = 0x2;
+    params.gc.bus_type = 0x20;
+    params.gc.big_page_size = 0x20000;
+    params.gc.compression_page_size = 0x20000;
+    params.gc.pde_coverage_bit_count = 0x1B;
+    params.gc.available_big_page_sizes = 0x30000;
+    params.gc.gpc_mask = 0x1;
+    params.gc.sm_arch_sm_version = 0x503;
+    params.gc.sm_arch_spa_version = 0x503;
+    params.gc.sm_arch_warp_count = 0x80;
+    params.gc.gpu_va_bit_count = 0x28;
+    params.gc.reserved = 0x0;
+    params.gc.flags = 0x55;
+    params.gc.twod_class = 0x902D;
+    params.gc.threed_class = 0xB197;
+    params.gc.compute_class = 0xB1C0;
+    params.gc.gpfifo_class = 0xB06F;
+    params.gc.inline_to_memory_class = 0xA140;
+    params.gc.dma_copy_class = 0xB0B5;
+    params.gc.max_fbps_count = 0x1;
+    params.gc.fbp_en_mask = 0x0;
+    params.gc.max_ltc_per_fbp = 0x2;
+    params.gc.max_lts_per_ltc = 0x1;
+    params.gc.max_tex_per_tpc = 0x0;
+    params.gc.max_gpc_count = 0x1;
+    params.gc.rop_l2_en_mask_0 = 0x21D70;
+    params.gc.rop_l2_en_mask_1 = 0x0;
+    params.gc.chipname = 0x6230326D67;
+    params.gc.gr_compbit_store_base_hw = 0x0;
+    params.gpu_characteristics_buf_size = 0xA0;
+    params.gpu_characteristics_buf_addr = 0xdeadbeef; // Cannot be 0 (UNUSED)
+    std::memcpy(output.data(), &params, output.size());
+    return NvResult::Success;
+}
+
+NvResult nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
+                                             std::vector<u8>& inline_output) {
    LOG_DEBUG(Service_NVDRV, "called");
    IoctlCharacteristics params{};
    std::memcpy(&params, input.data(), input.size());
@@ -89,35 +162,36 @@ u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vecto
    params.gpu_characteristics_buf_size = 0xA0;
    params.gpu_characteristics_buf_addr = 0xdeadbeef; // Cannot be 0 (UNUSED)

-    if (version == IoctlVersion::Version3) {
-        std::memcpy(output.data(), input.data(), output.size());
-        std::memcpy(output2.data(), &params.gc, output2.size());
-    } else {
-        std::memcpy(output.data(), &params, output.size());
-    }
-    return 0;
+    std::memcpy(output.data(), input.data(), output.size());
+    std::memcpy(inline_output.data(), &params.gc, inline_output.size());
+    return NvResult::Success;
 }

-u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output,
-                                 std::vector<u8>& output2, IoctlVersion version) {
+NvResult nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlGpuGetTpcMasksArgs params{};
    std::memcpy(&params, input.data(), input.size());
    LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
    if (params.mask_buffer_size != 0) {
        params.tcp_mask = 3;
    }
-
-    if (version == IoctlVersion::Version3) {
-        std::memcpy(output.data(), input.data(), output.size());
-        std::memcpy(output2.data(), &params.tcp_mask, output2.size());
-    } else {
-        std::memcpy(output.data(), &params, output.size());
-    }
-
-    return 0;
+    std::memcpy(output.data(), &params, output.size());
+    return NvResult::Success;
 }

-u32 nvhost_ctrl_gpu::GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output,
+                                      std::vector<u8>& inline_output) {
+    IoctlGpuGetTpcMasksArgs params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
+    if (params.mask_buffer_size != 0) {
+        params.tcp_mask = 3;
+    }
+    std::memcpy(output.data(), &params, output.size());
+    std::memcpy(inline_output.data(), &params.tcp_mask, inline_output.size());
+    return NvResult::Success;
+}
+
+NvResult nvhost_ctrl_gpu::GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_DEBUG(Service_NVDRV, "called");

    IoctlActiveSlotMask params{};
@@ -127,10 +201,10 @@ u32 nvhost_ctrl_gpu::GetActiveSlotMask(const std::vector<u8>& input, std::vector
    params.slot = 0x07;
    params.mask = 0x01;
    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_ctrl_gpu::ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_DEBUG(Service_NVDRV, "called");

    IoctlZcullGetCtxSize params{};
@@ -139,10 +213,10 @@ u32 nvhost_ctrl_gpu::ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u
    }
    params.size = 0x1;
    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_ctrl_gpu::ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_DEBUG(Service_NVDRV, "called");

    IoctlNvgpuGpuZcullGetInfoArgs params{};
@@ -162,47 +236,47 @@ u32 nvhost_ctrl_gpu::ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>&
    params.subregion_height_align_pixels = 0x40;
    params.subregion_count = 0x10;
    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_ctrl_gpu::ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_WARNING(Service_NVDRV, "(STUBBED) called");

    IoctlZbcSetTable params{};
    std::memcpy(&params, input.data(), input.size());
    // TODO(ogniK): What does this even actually do?
    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_ctrl_gpu::ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_WARNING(Service_NVDRV, "(STUBBED) called");

    IoctlZbcQueryTable params{};
    std::memcpy(&params, input.data(), input.size());
    // TODO : To implement properly
    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_ctrl_gpu::FlushL2(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::FlushL2(const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_WARNING(Service_NVDRV, "(STUBBED) called");

    IoctlFlushL2 params{};
    std::memcpy(&params, input.data(), input.size());
    // TODO : To implement properly
    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_DEBUG(Service_NVDRV, "called");

    IoctlGetGpuTime params{};
    std::memcpy(&params, input.data(), input.size());
    params.gpu_time = static_cast<u64_le>(system.CoreTiming().GetGlobalTimeNs().count());
    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -16,32 +16,13 @@ public:
    explicit nvhost_ctrl_gpu(Core::System& system);
    ~nvhost_ctrl_gpu() override;

-    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-              std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-              IoctlVersion version) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
+    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                    std::vector<u8>& inline_output) override;

 private:
-    enum class IoctlCommand : u32_le {
-        IocGetCharacteristicsCommand = 0xC0B04705,
-        IocGetTPCMasksCommand = 0xC0184706,
-        IocGetActiveSlotMaskCommand = 0x80084714,
-        IocZcullGetCtxSizeCommand = 0x80044701,
-        IocZcullGetInfo = 0x80284702,
-        IocZbcSetTable = 0x402C4703,
-        IocZbcQueryTable = 0xC0344704,
-        IocFlushL2 = 0x40084707,
-        IocInvalICache = 0x4008470D,
-        IocSetMmudebugMode = 0x4008470E,
-        IocSetSmDebugMode = 0x4010470F,
-        IocWaitForPause = 0xC0084710,
-        IocGetTcpExceptionEnStatus = 0x80084711,
-        IocNumVsms = 0x80084712,
-        IocVsmsMapping = 0xC0044713,
-        IocGetErrorChannelUserData = 0xC008471B,
-        IocGetGpuTime = 0xC010471C,
-        IocGetCpuTimeCorrelationInfo = 0xC108471D,
-    };
-
    struct IoctlGpuCharacteristics {
        u32_le arch;                       // 0x120 (NVGPU_GPU_ARCH_GM200)
        u32_le impl;                       // 0xB (NVGPU_GPU_IMPL_GM20B)
@@ -159,17 +140,21 @@ private:
    };
    static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size");

-    u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
-                           std::vector<u8>& output2, IoctlVersion version);
-    u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& output2,
-                    IoctlVersion version);
-    u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 FlushL2(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 GetGpuTime(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
+                                std::vector<u8>& inline_output);
+
+    NvResult GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output,
+                         std::vector<u8>& inline_output);
+
+    NvResult GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult FlushL2(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult GetGpuTime(const std::vector<u8>& input, std::vector<u8>& output);
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -7,117 +7,148 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/memory.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"

 namespace Service::Nvidia::Devices {

-nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
-    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
+nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                       SyncpointManager& syncpoint_manager)
+    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} {
+    channel_fence.id = syncpoint_manager.AllocateSyncpoint();
+    channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
+}
+
 nvhost_gpu::~nvhost_gpu() = default;

-u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-                      std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-                      IoctlVersion version) {
-    LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
-              command.raw, input.size(), output.size());
-
-    switch (static_cast<IoctlCommand>(command.raw)) {
-    case IoctlCommand::IocSetNVMAPfdCommand:
-        return SetNVMAPfd(input, output);
-    case IoctlCommand::IocSetClientDataCommand:
-        return SetClientData(input, output);
-    case IoctlCommand::IocGetClientDataCommand:
-        return GetClientData(input, output);
-    case IoctlCommand::IocZCullBind:
-        return ZCullBind(input, output);
-    case IoctlCommand::IocSetErrorNotifierCommand:
-        return SetErrorNotifier(input, output);
-    case IoctlCommand::IocChannelSetPriorityCommand:
-        return SetChannelPriority(input, output);
-    case IoctlCommand::IocAllocGPFIFOEx2Command:
-        return AllocGPFIFOEx2(input, output);
-    case IoctlCommand::IocAllocObjCtxCommand:
-        return AllocateObjectContext(input, output);
-    case IoctlCommand::IocChannelGetWaitbaseCommand:
-        return GetWaitbase(input, output);
-    case IoctlCommand::IocChannelSetTimeoutCommand:
-        return ChannelSetTimeout(input, output);
-    case IoctlCommand::IocChannelSetTimeslice:
-        return ChannelSetTimeslice(input, output);
-    default:
+NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    switch (command.group) {
+    case 0x0:
+        switch (command.cmd) {
+        case 0x3:
+            return GetWaitbase(input, output);
+        default:
+            break;
+        }
+        break;
+    case 'H':
+        switch (command.cmd) {
+        case 0x1:
+            return SetNVMAPfd(input, output);
+        case 0x3:
+            return ChannelSetTimeout(input, output);
+        case 0x8:
+            return SubmitGPFIFOBase(input, output, false);
+        case 0x9:
+            return AllocateObjectContext(input, output);
+        case 0xb:
+            return ZCullBind(input, output);
+        case 0xc:
+            return SetErrorNotifier(input, output);
+        case 0xd:
+            return SetChannelPriority(input, output);
+        case 0x1a:
+            return AllocGPFIFOEx2(input, output);
+        case 0x1b:
+            return SubmitGPFIFOBase(input, output, true);
+        case 0x1d:
+            return ChannelSetTimeslice(input, output);
+        default:
+            break;
+        }
+        break;
+    case 'G':
+        switch (command.cmd) {
+        case 0x14:
+            return SetClientData(input, output);
+        case 0x15:
+            return GetClientData(input, output);
+        default:
+            break;
+        }
        break;
    }
-
-    if (command.group == NVGPU_IOCTL_MAGIC) {
-        if (command.cmd == NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO) {
-            return SubmitGPFIFO(input, output);
-        }
-        if (command.cmd == NVGPU_IOCTL_CHANNEL_KICKOFF_PB) {
-            return KickoffPB(input, output, input2, version);
-        }
-    }
-
-    UNIMPLEMENTED_MSG("Unimplemented ioctl");
-    return 0;
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
 };

-u32 nvhost_gpu::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
+                            const std::vector<u8>& inline_input, std::vector<u8>& output) {
+    switch (command.group) {
+    case 'H':
+        switch (command.cmd) {
+        case 0x1b:
+            return SubmitGPFIFOBase(input, inline_input, output);
+        }
+        break;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                            std::vector<u8>& inline_output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_gpu::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlSetNvmapFD params{};
    std::memcpy(&params, input.data(), input.size());
    LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);

    nvmap_fd = params.nvmap_fd;
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_gpu::SetClientData(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_gpu::SetClientData(const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_DEBUG(Service_NVDRV, "called");

    IoctlClientData params{};
    std::memcpy(&params, input.data(), input.size());
    user_data = params.data;
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_gpu::GetClientData(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_gpu::GetClientData(const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_DEBUG(Service_NVDRV, "called");

    IoctlClientData params{};
    std::memcpy(&params, input.data(), input.size());
    params.data = user_data;
    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_gpu::ZCullBind(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_gpu::ZCullBind(const std::vector<u8>& input, std::vector<u8>& output) {
    std::memcpy(&zcull_params, input.data(), input.size());
    LOG_DEBUG(Service_NVDRV, "called, gpu_va={:X}, mode={:X}", zcull_params.gpu_va,
              zcull_params.mode);

    std::memcpy(output.data(), &zcull_params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_gpu::SetErrorNotifier(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_gpu::SetErrorNotifier(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlSetErrorNotifier params{};
    std::memcpy(&params, input.data(), input.size());
    LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset={:X}, size={:X}, mem={:X}", params.offset,
                params.size, params.mem);

    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_gpu::SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_gpu::SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output) {
    std::memcpy(&channel_priority, input.data(), input.size());
    LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority);

-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlAllocGpfifoEx2 params{};
    std::memcpy(&params, input.data(), input.size());
    LOG_WARNING(Service_NVDRV,
@@ -126,15 +157,15 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou
                params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
                params.unk3);

-    auto& gpu = system.GPU();
-    params.fence_out.id = assigned_syncpoints;
-    params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints);
-    assigned_syncpoints++;
+    channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
+
+    params.fence_out = channel_fence;
+
    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlAllocObjCtx params{};
    std::memcpy(&params, input.data(), input.size());
    LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num={:X}, flags={:X}", params.class_num,
@@ -142,102 +173,149 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<

    params.obj_id = 0x0;
    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
-    if (input.size() < sizeof(IoctlSubmitGpfifo)) {
-        UNIMPLEMENTED();
+static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) {
+    return {
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        {fence.value},
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id),
+    };
+}
+
+static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) {
+    std::vector<Tegra::CommandHeader> result{
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        {}};
+
+    for (u32 count = 0; count < add_increment; ++count) {
+        result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+                                                      Tegra::SubmissionMode::Increasing));
+        result.emplace_back(
+            Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id));
    }
-    IoctlSubmitGpfifo params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
+
+    return result;
+}
+
+static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence,
+                                                                          u32 add_increment) {
+    std::vector<Tegra::CommandHeader> result{
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        {}};
+    const std::vector<Tegra::CommandHeader> increment{
+        BuildIncrementCommandList(fence, add_increment)};
+
+    result.insert(result.end(), increment.begin(), increment.end());
+
+    return result;
+}
+
+NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+                                      Tegra::CommandList&& entries) {
    LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
              params.num_entries, params.flags.raw);

-    ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
-                                   params.num_entries * sizeof(Tegra::CommandListHeader),
-               "Incorrect input size");
-
-    Tegra::CommandList entries(params.num_entries);
-    std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
-                params.num_entries * sizeof(Tegra::CommandListHeader));
-
-    UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
-    UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
-
    auto& gpu = system.GPU();
-    u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
-    if (params.flags.increment.Value()) {
-        params.fence_out.value += current_syncpoint_value;
-    } else {
-        params.fence_out.value = current_syncpoint_value;
+
+    params.fence_out.id = channel_fence.id;
+
+    if (params.flags.add_wait.Value() &&
+        !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
+        gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
    }
+
+    if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
+        const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0;
+        params.fence_out.value = syncpoint_manager.IncreaseSyncpoint(
+            params.fence_out.id, params.AddIncrementValue() + increment_value);
+    } else {
+        params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
+    }
+
    gpu.PushGPUEntries(std::move(entries));

+    if (params.flags.add_increment.Value()) {
+        if (params.flags.suppress_wfi) {
+            gpu.PushGPUEntries(Tegra::CommandList{
+                BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
+        } else {
+            gpu.PushGPUEntries(Tegra::CommandList{
+                BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
+        }
+    }
+
    std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
-                          const std::vector<u8>& input2, IoctlVersion version) {
+NvResult nvhost_gpu::SubmitGPFIFOBase(const std::vector<u8>& input, std::vector<u8>& output,
+                                      bool kickoff) {
    if (input.size() < sizeof(IoctlSubmitGpfifo)) {
        UNIMPLEMENTED();
+        return NvResult::InvalidSize;
    }
    IoctlSubmitGpfifo params{};
    std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
-    LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
-              params.num_entries, params.flags.raw);
-
    Tegra::CommandList entries(params.num_entries);
-    if (version == IoctlVersion::Version2) {
-        std::memcpy(entries.data(), input2.data(),
-                    params.num_entries * sizeof(Tegra::CommandListHeader));
-    } else {
-        system.Memory().ReadBlock(params.address, entries.data(),
+
+    if (kickoff) {
+        system.Memory().ReadBlock(params.address, entries.command_lists.data(),
                                  params.num_entries * sizeof(Tegra::CommandListHeader));
-    }
-    UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
-    UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
-
-    auto& gpu = system.GPU();
-    u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
-    if (params.flags.increment.Value()) {
-        params.fence_out.value += current_syncpoint_value;
    } else {
-        params.fence_out.value = current_syncpoint_value;
+        std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)],
+                    params.num_entries * sizeof(Tegra::CommandListHeader));
    }
-    gpu.PushGPUEntries(std::move(entries));

-    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return SubmitGPFIFOImpl(params, output, std::move(entries));
 }

-u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_gpu::SubmitGPFIFOBase(const std::vector<u8>& input,
+                                      const std::vector<u8>& input_inline,
+                                      std::vector<u8>& output) {
+    if (input.size() < sizeof(IoctlSubmitGpfifo)) {
+        UNIMPLEMENTED();
+        return NvResult::InvalidSize;
+    }
+    IoctlSubmitGpfifo params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
+    Tegra::CommandList entries(params.num_entries);
+    std::memcpy(entries.command_lists.data(), input_inline.data(), input_inline.size());
+    return SubmitGPFIFOImpl(params, output, std::move(entries));
+}
+
+NvResult nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlGetWaitbase params{};
    std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
    LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);

    params.value = 0; // Seems to be hard coded at 0
    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_gpu::ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_gpu::ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlChannelSetTimeout params{};
    std::memcpy(&params, input.data(), sizeof(IoctlChannelSetTimeout));
    LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout);

-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_gpu::ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_gpu::ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlSetTimeslice params{};
    std::memcpy(&params, input.data(), sizeof(IoctlSetTimeslice));
    LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice);

    channel_timeslice = params.timeslice;

-    return 0;
+    return NvResult::Success;
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -11,46 +11,28 @@
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
 #include "core/hle/service/nvdrv/nvdata.h"
+#include "video_core/dma_pusher.h"
+
+namespace Service::Nvidia {
+class SyncpointManager;
+}

 namespace Service::Nvidia::Devices {

 class nvmap;
-constexpr u32 NVGPU_IOCTL_MAGIC('H');
-constexpr u32 NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO(0x8);
-constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);
-
 class nvhost_gpu final : public nvdevice {
 public:
-    explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
+    explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                        SyncpointManager& syncpoint_manager);
    ~nvhost_gpu() override;

-    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-              std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-              IoctlVersion version) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
+    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                    std::vector<u8>& inline_output) override;

 private:
-    enum class IoctlCommand : u32_le {
-        IocSetNVMAPfdCommand = 0x40044801,
-        IocAllocGPFIFOCommand = 0x40084805,
-        IocSetClientDataCommand = 0x40084714,
-        IocGetClientDataCommand = 0x80084715,
-        IocZCullBind = 0xc010480b,
-        IocSetErrorNotifierCommand = 0xC018480C,
-        IocChannelSetPriorityCommand = 0x4004480D,
-        IocEnableCommand = 0x0000480E,
-        IocDisableCommand = 0x0000480F,
-        IocPreemptCommand = 0x00004810,
-        IocForceResetCommand = 0x00004811,
-        IocEventIdControlCommand = 0x40084812,
-        IocGetErrorNotificationCommand = 0xC0104817,
-        IocAllocGPFIFOExCommand = 0x40204818,
-        IocAllocGPFIFOEx2Command = 0xC020481A,
-        IocAllocObjCtxCommand = 0xC0104809,
-        IocChannelGetWaitbaseCommand = 0xC0080003,
-        IocChannelSetTimeoutCommand = 0x40044803,
-        IocChannelSetTimeslice = 0xC004481D,
-    };
-
    enum class CtxObjects : u32_le {
        Ctx2D = 0x902D,
        Ctx3D = 0xB197,
@@ -61,63 +43,63 @@ private:
    };

    struct IoctlSetNvmapFD {
-        u32_le nvmap_fd;
+        s32_le nvmap_fd{};
    };
    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");

    struct IoctlChannelSetTimeout {
-        u32_le timeout;
+        u32_le timeout{};
    };
    static_assert(sizeof(IoctlChannelSetTimeout) == 4, "IoctlChannelSetTimeout is incorrect size");

    struct IoctlAllocGPFIFO {
-        u32_le num_entries;
-        u32_le flags;
+        u32_le num_entries{};
+        u32_le flags{};
    };
    static_assert(sizeof(IoctlAllocGPFIFO) == 8, "IoctlAllocGPFIFO is incorrect size");

    struct IoctlClientData {
-        u64_le data;
+        u64_le data{};
    };
    static_assert(sizeof(IoctlClientData) == 8, "IoctlClientData is incorrect size");

    struct IoctlZCullBind {
-        u64_le gpu_va;
-        u32_le mode; // 0=global, 1=no_ctxsw, 2=separate_buffer, 3=part_of_regular_buf
+        u64_le gpu_va{};
+        u32_le mode{}; // 0=global, 1=no_ctxsw, 2=separate_buffer, 3=part_of_regular_buf
        INSERT_PADDING_WORDS(1);
    };
    static_assert(sizeof(IoctlZCullBind) == 16, "IoctlZCullBind is incorrect size");

    struct IoctlSetErrorNotifier {
-        u64_le offset;
-        u64_le size;
-        u32_le mem; // nvmap object handle
+        u64_le offset{};
+        u64_le size{};
+        u32_le mem{}; // nvmap object handle
        INSERT_PADDING_WORDS(1);
    };
    static_assert(sizeof(IoctlSetErrorNotifier) == 24, "IoctlSetErrorNotifier is incorrect size");

    struct IoctlChannelSetPriority {
-        u32_le priority;
+        u32_le priority{};
    };
    static_assert(sizeof(IoctlChannelSetPriority) == 4,
                  "IoctlChannelSetPriority is incorrect size");

    struct IoctlSetTimeslice {
-        u32_le timeslice;
+        u32_le timeslice{};
    };
    static_assert(sizeof(IoctlSetTimeslice) == 4, "IoctlSetTimeslice is incorrect size");

    struct IoctlEventIdControl {
-        u32_le cmd; // 0=disable, 1=enable, 2=clear
-        u32_le id;
+        u32_le cmd{}; // 0=disable, 1=enable, 2=clear
+        u32_le id{};
    };
    static_assert(sizeof(IoctlEventIdControl) == 8, "IoctlEventIdControl is incorrect size");

    struct IoctlGetErrorNotification {
-        u64_le timestamp;
-        u32_le info32;
-        u16_le info16;
-        u16_le status; // always 0xFFFF
+        u64_le timestamp{};
+        u32_le info32{};
+        u16_le info16{};
+        u16_le status{}; // always 0xFFFF
    };
    static_assert(sizeof(IoctlGetErrorNotification) == 16,
                  "IoctlGetErrorNotification is incorrect size");
@@ -125,80 +107,89 @@ private:
    static_assert(sizeof(Fence) == 8, "Fence is incorrect size");

    struct IoctlAllocGpfifoEx {
-        u32_le num_entries;
-        u32_le flags;
-        u32_le unk0;
-        u32_le unk1;
-        u32_le unk2;
-        u32_le unk3;
-        u32_le unk4;
-        u32_le unk5;
+        u32_le num_entries{};
+        u32_le flags{};
+        u32_le unk0{};
+        u32_le unk1{};
+        u32_le unk2{};
+        u32_le unk3{};
+        u32_le unk4{};
+        u32_le unk5{};
    };
    static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size");

    struct IoctlAllocGpfifoEx2 {
-        u32_le num_entries; // in
-        u32_le flags;       // in
-        u32_le unk0;        // in (1 works)
-        Fence fence_out;    // out
-        u32_le unk1;        // in
-        u32_le unk2;        // in
-        u32_le unk3;        // in
+        u32_le num_entries{}; // in
+        u32_le flags{};       // in
+        u32_le unk0{};        // in (1 works)
+        Fence fence_out{};    // out
+        u32_le unk1{};        // in
+        u32_le unk2{};        // in
+        u32_le unk3{};        // in
    };
    static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size");

    struct IoctlAllocObjCtx {
-        u32_le class_num; // 0x902D=2d, 0xB197=3d, 0xB1C0=compute, 0xA140=kepler, 0xB0B5=DMA,
-                          // 0xB06F=channel_gpfifo
-        u32_le flags;
-        u64_le obj_id; // (ignored) used for FREE_OBJ_CTX ioctl, which is not supported
+        u32_le class_num{}; // 0x902D=2d, 0xB197=3d, 0xB1C0=compute, 0xA140=kepler, 0xB0B5=DMA,
+                            // 0xB06F=channel_gpfifo
+        u32_le flags{};
+        u64_le obj_id{}; // (ignored) used for FREE_OBJ_CTX ioctl, which is not supported
    };
    static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");

    struct IoctlSubmitGpfifo {
-        u64_le address;     // pointer to gpfifo entry structs
-        u32_le num_entries; // number of fence objects being submitted
+        u64_le address{};     // pointer to gpfifo entry structs
+        u32_le num_entries{}; // number of fence objects being submitted
        union {
            u32_le raw;
            BitField<0, 1, u32_le> add_wait;      // append a wait sync_point to the list
            BitField<1, 1, u32_le> add_increment; // append an increment to the list
-            BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
+            BitField<2, 1, u32_le> new_hw_format; // mostly ignored
+            BitField<4, 1, u32_le> suppress_wfi;  // suppress wait for interrupt
            BitField<8, 1, u32_le> increment;     // increment the returned fence
        } flags;
-        Fence fence_out; // returned new fence object for others to wait on
+        Fence fence_out{}; // returned new fence object for others to wait on
+
+        u32 AddIncrementValue() const {
+            return flags.add_increment.Value() << 1;
+        }
    };
    static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
                  "IoctlSubmitGpfifo is incorrect size");

    struct IoctlGetWaitbase {
-        u32 unknown; // seems to be ignored? Nintendo added this
-        u32 value;
+        u32 unknown{}; // seems to be ignored? Nintendo added this
+        u32 value{};
    };
    static_assert(sizeof(IoctlGetWaitbase) == 8, "IoctlGetWaitbase is incorrect size");

-    u32_le nvmap_fd{};
+    s32_le nvmap_fd{};
    u64_le user_data{};
    IoctlZCullBind zcull_params{};
    u32_le channel_priority{};
    u32_le channel_timeslice{};

-    u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 GetClientData(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 ZCullBind(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 SetErrorNotifier(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
-                  const std::vector<u8>& input2, IoctlVersion version);
-    u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult SetClientData(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult GetClientData(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult ZCullBind(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult SetErrorNotifier(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+                              Tegra::CommandList&& entries);
+    NvResult SubmitGPFIFOBase(const std::vector<u8>& input, std::vector<u8>& output,
+                              bool kickoff = false);
+    NvResult SubmitGPFIFOBase(const std::vector<u8>& input, const std::vector<u8>& input_inline,
+                              std::vector<u8>& output);
+    NvResult GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);

    std::shared_ptr<nvmap> nvmap_dev;
-    u32 assigned_syncpoints{};
+    SyncpointManager& syncpoint_manager;
+    Fence channel_fence;
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -15,46 +15,58 @@ nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_de
    : nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
 nvhost_nvdec::~nvhost_nvdec() = default;

-u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-                        std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-                        IoctlVersion version) {
-    LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
-              command.raw, input.size(), output.size());
-
-    switch (static_cast<IoctlCommand>(command.raw)) {
-    case IoctlCommand::IocSetNVMAPfdCommand:
-        return SetNVMAPfd(input);
-    case IoctlCommand::IocSubmit:
-        return Submit(input, output);
-    case IoctlCommand::IocGetSyncpoint:
-        return GetSyncpoint(input, output);
-    case IoctlCommand::IocGetWaitbase:
-        return GetWaitbase(input, output);
-    case IoctlCommand::IocMapBuffer:
-    case IoctlCommand::IocMapBuffer2:
-    case IoctlCommand::IocMapBuffer3:
-    case IoctlCommand::IocMapBufferEx:
-        return MapBuffer(input, output);
-    case IoctlCommand::IocUnmapBufferEx: {
-        // This command is sent when the video stream has ended, flush all video contexts
-        // This is usually sent in the folowing order: vic, nvdec, vic.
-        // Inform the GPU to clear any remaining nvdec buffers when this is detected.
-        LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
-        Tegra::ChCommandHeaderList cmdlist(1);
-        cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F};
-        system.GPU().PushCommandBuffer(cmdlist);
-        [[fallthrough]]; // fallthrough to unmap buffers
-    };
-    case IoctlCommand::IocUnmapBuffer:
-    case IoctlCommand::IocUnmapBuffer2:
-    case IoctlCommand::IocUnmapBuffer3:
-        return UnmapBuffer(input, output);
-    case IoctlCommand::IocSetSubmitTimeout:
-        return SetSubmitTimeout(input, output);
+NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,
+                              std::vector<u8>& output) {
+    switch (command.group) {
+    case 0x0:
+        switch (command.cmd) {
+        case 0x1:
+            return Submit(input, output);
+        case 0x2:
+            return GetSyncpoint(input, output);
+        case 0x3:
+            return GetWaitbase(input, output);
+        case 0x7:
+            return SetSubmitTimeout(input, output);
+        case 0x9:
+            return MapBuffer(input, output);
+        case 0xa: {
+            if (command.length == 0x1c) {
+                LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
+                Tegra::ChCommandHeaderList cmdlist(1);
+                cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F};
+                system.GPU().PushCommandBuffer(cmdlist);
+            }
+            return UnmapBuffer(input, output);
+        }
+        default:
+            break;
+        }
+        break;
+    case 'H':
+        switch (command.cmd) {
+        case 0x1:
+            return SetNVMAPfd(input);
+        default:
+            break;
+        }
+        break;
    }

-    UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw);
-    return 0;
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_nvdec::Ioctl2(Ioctl command, const std::vector<u8>& input,
+                              const std::vector<u8>& inline_input, std::vector<u8>& output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_nvdec::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                              std::vector<u8>& inline_output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -14,26 +14,11 @@ public:
    explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
    ~nvhost_nvdec() override;

-    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-              std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-              IoctlVersion version) override;
-
-private:
-    enum class IoctlCommand : u32_le {
-        IocSetNVMAPfdCommand = 0x40044801,
-        IocSubmit = 0xC0400001,
-        IocGetSyncpoint = 0xC0080002,
-        IocGetWaitbase = 0xC0080003,
-        IocMapBuffer = 0xC01C0009,
-        IocMapBuffer2 = 0xC16C0009,
-        IocMapBuffer3 = 0xC15C0009,
-        IocMapBufferEx = 0xC0A40009,
-        IocUnmapBuffer = 0xC0A4000A,
-        IocUnmapBuffer2 = 0xC16C000A,
-        IocUnmapBufferEx = 0xC01C000A,
-        IocUnmapBuffer3 = 0xC15C000A,
-        IocSetSubmitTimeout = 0x40040007,
-    };
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
+    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                    std::vector<u8>& inline_output) override;
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -36,26 +36,20 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
 }
 } // Anonymous namespace

-namespace NvErrCodes {
-constexpr u32 Success{};
-[[maybe_unused]] constexpr u32 OutOfMemory{static_cast<u32>(-12)};
-constexpr u32 InvalidInput{static_cast<u32>(-22)};
-} // namespace NvErrCodes
-
 nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
 nvhost_nvdec_common::~nvhost_nvdec_common() = default;

-u32 nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
+NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
    IoctlSetNvmapFD params{};
    std::memcpy(&params, input.data(), sizeof(IoctlSetNvmapFD));
    LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);

    nvmap_fd = params.nvmap_fd;
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlSubmit params{};
    std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
    LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
@@ -83,12 +77,12 @@ u32 nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u8>& o

    for (const auto& cmd_buffer : command_buffers) {
        auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
-        ASSERT_OR_EXECUTE(object, return NvErrCodes::InvalidInput;);
+        ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
        const auto map = FindBufferMap(object->dma_map_addr);
        if (!map) {
            LOG_ERROR(Service_NVDRV, "Tried to submit an invalid offset 0x{:X} dma 0x{:X}",
                      object->addr, object->dma_map_addr);
-            return 0;
+            return NvResult::Success;
        }
        Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
        gpu.MemoryManager().ReadBlock(map->StartAddr() + cmd_buffer.offset, cmdlist.data(),
@@ -105,10 +99,10 @@ u32 nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u8>& o
    offset = WriteVectors(output, syncpt_increments, offset);
    offset = WriteVectors(output, wait_checks, offset);

-    return NvErrCodes::Success;
+    return NvResult::Success;
 }

-u32 nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlGetSyncpoint params{};
    std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
    LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
@@ -118,18 +112,18 @@ u32 nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::vector<
    params.value = 0;
    std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));

-    return NvErrCodes::Success;
+    return NvResult::Success;
 }

-u32 nvhost_nvdec_common::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlGetWaitbase params{};
    std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
    params.value = 0; // Seems to be hard coded at 0
    std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
-    return 0;
+    return NvResult::Success;
 }

-u32 nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlMapBuffer params{};
    std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
    std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
@@ -143,7 +137,7 @@ u32 nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vector<u8>
        if (!object) {
            LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
            std::memcpy(output.data(), &params, output.size());
-            return NvErrCodes::InvalidInput;
+            return NvResult::InvalidState;
        }
        if (object->dma_map_addr == 0) {
            // NVDEC and VIC memory is in the 32-bit address space
@@ -165,10 +159,10 @@ u32 nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vector<u8>
    std::memcpy(output.data() + sizeof(IoctlMapBuffer), cmd_buffer_handles.data(),
                cmd_buffer_handles.size() * sizeof(MapBufferEntry));

-    return NvErrCodes::Success;
+    return NvResult::Success;
 }

-u32 nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlMapBuffer params{};
    std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
    std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
@@ -181,7 +175,7 @@ u32 nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u
        if (!object) {
            LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
            std::memcpy(output.data(), &params, output.size());
-            return NvErrCodes::InvalidInput;
+            return NvResult::InvalidState;
        }
        if (const auto size{RemoveBufferMap(object->dma_map_addr)}; size) {
            gpu.MemoryManager().Unmap(object->dma_map_addr, *size);
@@ -193,13 +187,14 @@ u32 nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u
        object->dma_map_addr = 0;
    }
    std::memset(output.data(), 0, output.size());
-    return NvErrCodes::Success;
+    return NvResult::Success;
 }

-u32 nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input,
+                                               std::vector<u8>& output) {
    std::memcpy(&submit_timeout, input.data(), input.size());
    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
-    return NvErrCodes::Success;
+    return NvResult::Success;
 }

 std::optional<nvhost_nvdec_common::BufferMap> nvhost_nvdec_common::FindBufferMap(
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -18,9 +18,37 @@ public:
    explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
    ~nvhost_nvdec_common() override;

-    virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-                      std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-                      IoctlVersion version) = 0;
+    /**
+     * Handles an ioctl1 request.
+     * @param command The ioctl command id.
+     * @param input A buffer containing the input data for the ioctl.
+     * @param output A buffer where the output data will be written to.
+     * @returns The result code of the ioctl.
+     */
+    virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input,
+                            std::vector<u8>& output) = 0;
+
+    /**
+     * Handles an ioctl2 request.
+     * @param command The ioctl command id.
+     * @param input A buffer containing the input data for the ioctl.
+     * @param inline_input A buffer containing the input data for the ioctl which has been inlined.
+     * @param output A buffer where the output data will be written to.
+     * @returns The result code of the ioctl.
+     */
+    virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
+                            const std::vector<u8>& inline_input, std::vector<u8>& output) = 0;
+
+    /**
+     * Handles an ioctl3 request.
+     * @param command The ioctl command id.
+     * @param input A buffer containing the input data for the ioctl.
+     * @param output A buffer where the output data will be written to.
+     * @param inline_output A buffer where the inlined output data will be written to.
+     * @returns The result code of the ioctl.
+     */
+    virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                            std::vector<u8>& inline_output) = 0;

 protected:
    class BufferMap final {
@@ -63,102 +91,102 @@ protected:
    };

    struct IoctlSetNvmapFD {
-        u32_le nvmap_fd;
+        s32_le nvmap_fd{};
    };
    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");

    struct IoctlSubmitCommandBuffer {
-        u32_le id;
-        u32_le offset;
-        u32_le count;
+        u32_le id{};
+        u32_le offset{};
+        u32_le count{};
    };
    static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC,
                  "IoctlSubmitCommandBuffer is incorrect size");
    struct IoctlSubmit {
-        u32_le cmd_buffer_count;
-        u32_le relocation_count;
-        u32_le syncpoint_count;
-        u32_le fence_count;
+        u32_le cmd_buffer_count{};
+        u32_le relocation_count{};
+        u32_le syncpoint_count{};
+        u32_le fence_count{};
    };
    static_assert(sizeof(IoctlSubmit) == 0x10, "IoctlSubmit has incorrect size");

    struct CommandBuffer {
-        s32 memory_id;
-        u32 offset;
-        s32 word_count;
+        s32 memory_id{};
+        u32 offset{};
+        s32 word_count{};
    };
    static_assert(sizeof(CommandBuffer) == 0xC, "CommandBuffer has incorrect size");

    struct Reloc {
-        s32 cmdbuffer_memory;
-        s32 cmdbuffer_offset;
-        s32 target;
-        s32 target_offset;
+        s32 cmdbuffer_memory{};
+        s32 cmdbuffer_offset{};
+        s32 target{};
+        s32 target_offset{};
    };
    static_assert(sizeof(Reloc) == 0x10, "CommandBuffer has incorrect size");

    struct SyncptIncr {
-        u32 id;
-        u32 increments;
+        u32 id{};
+        u32 increments{};
    };
    static_assert(sizeof(SyncptIncr) == 0x8, "CommandBuffer has incorrect size");

    struct Fence {
-        u32 id;
-        u32 value;
+        u32 id{};
+        u32 value{};
    };
    static_assert(sizeof(Fence) == 0x8, "CommandBuffer has incorrect size");

    struct IoctlGetSyncpoint {
        // Input
-        u32_le param;
+        u32_le param{};
        // Output
-        u32_le value;
+        u32_le value{};
    };
    static_assert(sizeof(IoctlGetSyncpoint) == 8, "IocGetIdParams has wrong size");

    struct IoctlGetWaitbase {
-        u32_le unknown; // seems to be ignored? Nintendo added this
-        u32_le value;
+        u32_le unknown{}; // seems to be ignored? Nintendo added this
+        u32_le value{};
    };
    static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size");

    struct IoctlMapBuffer {
-        u32_le num_entries;
-        u32_le data_address; // Ignored by the driver.
-        u32_le attach_host_ch_das;
+        u32_le num_entries{};
+        u32_le data_address{}; // Ignored by the driver.
+        u32_le attach_host_ch_das{};
    };
    static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size");

    struct IocGetIdParams {
        // Input
-        u32_le param;
+        u32_le param{};
        // Output
-        u32_le value;
+        u32_le value{};
    };
    static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");

    // Used for mapping and unmapping command buffers
    struct MapBufferEntry {
-        u32_le map_handle;
-        u32_le map_address;
+        u32_le map_handle{};
+        u32_le map_address{};
    };
    static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size");

    /// Ioctl command implementations
-    u32 SetNVMAPfd(const std::vector<u8>& input);
-    u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult SetNVMAPfd(const std::vector<u8>& input);
+    NvResult Submit(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);

    std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const;
    void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
    std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);

-    u32_le nvmap_fd{};
+    s32_le nvmap_fd{};
    u32_le submit_timeout{};
    std::shared_ptr<nvmap> nvmap_dev;

--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -13,28 +13,44 @@ namespace Service::Nvidia::Devices {
 nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {}
 nvhost_nvjpg::~nvhost_nvjpg() = default;

-u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-                        std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-                        IoctlVersion version) {
-    LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
-              command.raw, input.size(), output.size());
-
-    switch (static_cast<IoctlCommand>(command.raw)) {
-    case IoctlCommand::IocSetNVMAPfdCommand:
-        return SetNVMAPfd(input, output);
+NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input,
+                              std::vector<u8>& output) {
+    switch (command.group) {
+    case 'H':
+        switch (command.cmd) {
+        case 0x1:
+            return SetNVMAPfd(input, output);
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
    }

-    UNIMPLEMENTED_MSG("Unimplemented ioctl");
-    return 0;
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
 }

-u32 nvhost_nvjpg::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_nvjpg::Ioctl2(Ioctl command, const std::vector<u8>& input,
+                              const std::vector<u8>& inline_input, std::vector<u8>& output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_nvjpg::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                              std::vector<u8>& inline_output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_nvjpg::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlSetNvmapFD params{};
    std::memcpy(&params, input.data(), input.size());
    LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);

    nvmap_fd = params.nvmap_fd;
-    return 0;
+    return NvResult::Success;
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -16,23 +16,21 @@ public:
    explicit nvhost_nvjpg(Core::System& system);
    ~nvhost_nvjpg() override;

-    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-              std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-              IoctlVersion version) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
+    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                    std::vector<u8>& inline_output) override;

 private:
-    enum class IoctlCommand : u32_le {
-        IocSetNVMAPfdCommand = 0x40044801,
-    };
-
    struct IoctlSetNvmapFD {
-        u32_le nvmap_fd;
+        s32_le nvmap_fd{};
    };
    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");

-    u32_le nvmap_fd{};
+    s32_le nvmap_fd{};

-    u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -15,36 +15,50 @@ nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)

 nvhost_vic::~nvhost_vic() = default;

-u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-                      std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-                      IoctlVersion version) {
-    LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
-              command.raw, input.size(), output.size());
-
-    switch (static_cast<IoctlCommand>(command.raw)) {
-    case IoctlCommand::IocSetNVMAPfdCommand:
-        return SetNVMAPfd(input);
-    case IoctlCommand::IocSubmit:
-        return Submit(input, output);
-    case IoctlCommand::IocGetSyncpoint:
-        return GetSyncpoint(input, output);
-    case IoctlCommand::IocGetWaitbase:
-        return GetWaitbase(input, output);
-    case IoctlCommand::IocMapBuffer:
-    case IoctlCommand::IocMapBuffer2:
-    case IoctlCommand::IocMapBuffer3:
-    case IoctlCommand::IocMapBuffer4:
-    case IoctlCommand::IocMapBufferEx:
-        return MapBuffer(input, output);
-    case IoctlCommand::IocUnmapBuffer:
-    case IoctlCommand::IocUnmapBuffer2:
-    case IoctlCommand::IocUnmapBuffer3:
-    case IoctlCommand::IocUnmapBufferEx:
-        return UnmapBuffer(input, output);
+NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    switch (command.group) {
+    case 0x0:
+        switch (command.cmd) {
+        case 0x1:
+            return Submit(input, output);
+        case 0x2:
+            return GetSyncpoint(input, output);
+        case 0x3:
+            return GetWaitbase(input, output);
+        case 0x9:
+            return MapBuffer(input, output);
+        case 0xa:
+            return UnmapBuffer(input, output);
+        default:
+            break;
+        }
+        break;
+    case 'H':
+        switch (command.cmd) {
+        case 0x1:
+            return SetNVMAPfd(input);
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
    }

-    UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw);
-    return 0;
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_vic::Ioctl2(Ioctl command, const std::vector<u8>& input,
+                            const std::vector<u8>& inline_input, std::vector<u8>& output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvhost_vic::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                            std::vector<u8>& inline_output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -13,25 +13,11 @@ class nvhost_vic final : public nvhost_nvdec_common {
 public:
    explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
    ~nvhost_vic();
-    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-              std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-              IoctlVersion version) override;

-private:
-    enum class IoctlCommand : u32_le {
-        IocSetNVMAPfdCommand = 0x40044801,
-        IocSubmit = 0xC0400001,
-        IocGetSyncpoint = 0xC0080002,
-        IocGetWaitbase = 0xC0080003,
-        IocMapBuffer = 0xC01C0009,
-        IocMapBuffer2 = 0xC0340009,
-        IocMapBuffer3 = 0xC0140009,
-        IocMapBuffer4 = 0xC00C0009,
-        IocMapBufferEx = 0xC03C0009,
-        IocUnmapBuffer = 0xC03C000A,
-        IocUnmapBuffer2 = 0xC034000A,
-        IocUnmapBuffer3 = 0xC00C000A,
-        IocUnmapBufferEx = 0xC01C000A,
-    };
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
+    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                    std::vector<u8>& inline_output) override;
 };
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -11,13 +11,6 @@

 namespace Service::Nvidia::Devices {

-namespace NvErrCodes {
-enum {
-    OperationNotPermitted = -1,
-    InvalidValue = -22,
-};
-}
-
 nvmap::nvmap(Core::System& system) : nvdevice(system) {
    // Handle 0 appears to be used when remapping, so we create a placeholder empty nvmap object to
    // represent this.
@@ -26,6 +19,46 @@ nvmap::nvmap(Core::System& system) : nvdevice(system) {

 nvmap::~nvmap() = default;

+NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    switch (command.group) {
+    case 0x1:
+        switch (command.cmd) {
+        case 0x1:
+            return IocCreate(input, output);
+        case 0x3:
+            return IocFromId(input, output);
+        case 0x4:
+            return IocAlloc(input, output);
+        case 0x5:
+            return IocFree(input, output);
+        case 0x9:
+            return IocParam(input, output);
+        case 0xe:
+            return IocGetId(input, output);
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvmap::Ioctl2(Ioctl command, const std::vector<u8>& input,
+                       const std::vector<u8>& inline_input, std::vector<u8>& output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
+NvResult nvmap::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                       std::vector<u8>& inline_output) {
+    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
+    return NvResult::NotImplemented;
+}
+
 VAddr nvmap::GetObjectAddress(u32 handle) const {
    auto object = GetObject(handle);
    ASSERT(object);
@@ -33,28 +66,6 @@ VAddr nvmap::GetObjectAddress(u32 handle) const {
    return object->addr;
 }

-u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-                 std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-                 IoctlVersion version) {
-    switch (static_cast<IoctlCommand>(command.raw)) {
-    case IoctlCommand::Create:
-        return IocCreate(input, output);
-    case IoctlCommand::Alloc:
-        return IocAlloc(input, output);
-    case IoctlCommand::GetId:
-        return IocGetId(input, output);
-    case IoctlCommand::FromId:
-        return IocFromId(input, output);
-    case IoctlCommand::Param:
-        return IocParam(input, output);
-    case IoctlCommand::Free:
-        return IocFree(input, output);
-    }
-
-    UNIMPLEMENTED_MSG("Unimplemented ioctl");
-    return 0;
-}
-
 u32 nvmap::CreateObject(u32 size) {
    // Create a new nvmap object and obtain a handle to it.
    auto object = std::make_shared<Object>();
@@ -70,35 +81,35 @@ u32 nvmap::CreateObject(u32 size) {
    return handle;
 }

-u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
    IocCreateParams params;
    std::memcpy(&params, input.data(), sizeof(params));
    LOG_DEBUG(Service_NVDRV, "size=0x{:08X}", params.size);

    if (!params.size) {
        LOG_ERROR(Service_NVDRV, "Size is 0");
-        return static_cast<u32>(NvErrCodes::InvalidValue);
+        return NvResult::BadValue;
    }

    params.handle = CreateObject(params.size);

    std::memcpy(output.data(), &params, sizeof(params));
-    return 0;
+    return NvResult::Success;
 }

-u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
    IocAllocParams params;
    std::memcpy(&params, input.data(), sizeof(params));
    LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr);

    if (!params.handle) {
        LOG_ERROR(Service_NVDRV, "Handle is 0");
-        return static_cast<u32>(NvErrCodes::InvalidValue);
+        return NvResult::BadValue;
    }

    if ((params.align - 1) & params.align) {
        LOG_ERROR(Service_NVDRV, "Incorrect alignment used, alignment={:08X}", params.align);
-        return static_cast<u32>(NvErrCodes::InvalidValue);
+        return NvResult::BadValue;
    }

    const u32 min_alignment = 0x1000;
@@ -109,12 +120,12 @@ u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
    auto object = GetObject(params.handle);
    if (!object) {
        LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
-        return static_cast<u32>(NvErrCodes::InvalidValue);
+        return NvResult::BadValue;
    }

    if (object->status == Object::Status::Allocated) {
        LOG_ERROR(Service_NVDRV, "Object is already allocated, handle={:08X}", params.handle);
-        return static_cast<u32>(NvErrCodes::OperationNotPermitted);
+        return NvResult::InsufficientMemory;
    }

    object->flags = params.flags;
@@ -124,10 +135,10 @@ u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
    object->status = Object::Status::Allocated;

    std::memcpy(output.data(), &params, sizeof(params));
-    return 0;
+    return NvResult::Success;
 }

-u32 nvmap::IocGetId(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvmap::IocGetId(const std::vector<u8>& input, std::vector<u8>& output) {
    IocGetIdParams params;
    std::memcpy(&params, input.data(), sizeof(params));

@@ -135,22 +146,22 @@ u32 nvmap::IocGetId(const std::vector<u8>& input, std::vector<u8>& output) {

    if (!params.handle) {
        LOG_ERROR(Service_NVDRV, "Handle is zero");
-        return static_cast<u32>(NvErrCodes::InvalidValue);
+        return NvResult::BadValue;
    }

    auto object = GetObject(params.handle);
    if (!object) {
        LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
-        return static_cast<u32>(NvErrCodes::OperationNotPermitted);
+        return NvResult::BadValue;
    }

    params.id = object->id;

    std::memcpy(output.data(), &params, sizeof(params));
-    return 0;
+    return NvResult::Success;
 }

-u32 nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output) {
    IocFromIdParams params;
    std::memcpy(&params, input.data(), sizeof(params));

@@ -160,13 +171,13 @@ u32 nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output) {
                            [&](const auto& entry) { return entry.second->id == params.id; });
    if (itr == handles.end()) {
        LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
-        return static_cast<u32>(NvErrCodes::InvalidValue);
+        return NvResult::BadValue;
    }

    auto& object = itr->second;
    if (object->status != Object::Status::Allocated) {
        LOG_ERROR(Service_NVDRV, "Object is not allocated, handle={:08X}", params.handle);
-        return static_cast<u32>(NvErrCodes::InvalidValue);
+        return NvResult::BadValue;
    }

    itr->second->refcount++;
@@ -175,10 +186,10 @@ u32 nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output) {
    params.handle = itr->first;

    std::memcpy(output.data(), &params, sizeof(params));
-    return 0;
+    return NvResult::Success;
 }

-u32 nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) {
    enum class ParamTypes { Size = 1, Alignment = 2, Base = 3, Heap = 4, Kind = 5, Compr = 6 };

    IocParamParams params;
@@ -189,12 +200,12 @@ u32 nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) {
    auto object = GetObject(params.handle);
    if (!object) {
        LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
-        return static_cast<u32>(NvErrCodes::InvalidValue);
+        return NvResult::BadValue;
    }

    if (object->status != Object::Status::Allocated) {
        LOG_ERROR(Service_NVDRV, "Object is not allocated, handle={:08X}", params.handle);
-        return static_cast<u32>(NvErrCodes::OperationNotPermitted);
+        return NvResult::BadValue;
    }

    switch (static_cast<ParamTypes>(params.param)) {
@@ -216,10 +227,10 @@ u32 nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) {
    }

    std::memcpy(output.data(), &params, sizeof(params));
-    return 0;
+    return NvResult::Success;
 }

-u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
    // TODO(Subv): These flags are unconfirmed.
    enum FreeFlags {
        Freed = 0,
@@ -234,14 +245,14 @@ u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
    auto itr = handles.find(params.handle);
    if (itr == handles.end()) {
        LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
-        return static_cast<u32>(NvErrCodes::InvalidValue);
+        return NvResult::BadValue;
    }
    if (!itr->second->refcount) {
        LOG_ERROR(
            Service_NVDRV,
            "There is no references to this object. The object is already freed. handle={:08X}",
            params.handle);
-        return static_cast<u32>(NvErrCodes::InvalidValue);
+        return NvResult::BadValue;
    }

    itr->second->refcount--;
@@ -261,7 +272,7 @@ u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
    handles.erase(params.handle);

    std::memcpy(output.data(), &params, sizeof(params));
-    return 0;
+    return NvResult::Success;
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -19,13 +19,15 @@ public:
    explicit nvmap(Core::System& system);
    ~nvmap() override;

+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
+    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
+                    std::vector<u8>& inline_output) override;
+
    /// Returns the allocated address of an nvmap object given its handle.
    VAddr GetObjectAddress(u32 handle) const;

-    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
-              std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-              IoctlVersion version) override;
-
    /// Represents an nvmap object.
    struct Object {
        enum class Status { Created, Allocated };
@@ -58,76 +60,68 @@ private:
    /// Mapping of currently allocated handles to the objects they represent.
    std::unordered_map<u32, std::shared_ptr<Object>> handles;

-    enum class IoctlCommand : u32 {
-        Create = 0xC0080101,
-        FromId = 0xC0080103,
-        Alloc = 0xC0200104,
-        Free = 0xC0180105,
-        Param = 0xC00C0109,
-        GetId = 0xC008010E,
-    };
    struct IocCreateParams {
        // Input
-        u32_le size;
+        u32_le size{};
        // Output
-        u32_le handle;
+        u32_le handle{};
    };
    static_assert(sizeof(IocCreateParams) == 8, "IocCreateParams has wrong size");

    struct IocFromIdParams {
        // Input
-        u32_le id;
+        u32_le id{};
        // Output
-        u32_le handle;
+        u32_le handle{};
    };
    static_assert(sizeof(IocFromIdParams) == 8, "IocFromIdParams has wrong size");

    struct IocAllocParams {
        // Input
-        u32_le handle;
-        u32_le heap_mask;
-        u32_le flags;
-        u32_le align;
-        u8 kind;
+        u32_le handle{};
+        u32_le heap_mask{};
+        u32_le flags{};
+        u32_le align{};
+        u8 kind{};
        INSERT_PADDING_BYTES(7);
-        u64_le addr;
+        u64_le addr{};
    };
    static_assert(sizeof(IocAllocParams) == 32, "IocAllocParams has wrong size");

    struct IocFreeParams {
-        u32_le handle;
+        u32_le handle{};
        INSERT_PADDING_BYTES(4);
-        u64_le address;
-        u32_le size;
-        u32_le flags;
+        u64_le address{};
+        u32_le size{};
+        u32_le flags{};
    };
    static_assert(sizeof(IocFreeParams) == 24, "IocFreeParams has wrong size");

    struct IocParamParams {
        // Input
-        u32_le handle;
-        u32_le param;
+        u32_le handle{};
+        u32_le param{};
        // Output
-        u32_le result;
+        u32_le result{};
    };
    static_assert(sizeof(IocParamParams) == 12, "IocParamParams has wrong size");

    struct IocGetIdParams {
        // Output
-        u32_le id;
+        u32_le id{};
        // Input
-        u32_le handle;
+        u32_le handle{};
    };
    static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");

    u32 CreateObject(u32 size);

-    u32 IocCreate(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 IocAlloc(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 IocGetId(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 IocFromId(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 IocParam(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 IocFree(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult IocCreate(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult IocAlloc(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult IocGetId(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult IocFromId(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult IocParam(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult IocFree(const std::vector<u8>& input, std::vector<u8>& output);
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -23,124 +23,170 @@ void NVDRV::SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
 void NVDRV::Open(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_NVDRV, "called");

-    const auto& buffer = ctx.ReadBuffer();
-    std::string device_name(buffer.begin(), buffer.end());
+    if (!is_initialized) {
+        ServiceError(ctx, NvResult::NotInitialized);
+        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        return;
+    }
+
+    const auto& buffer = ctx.ReadBuffer();
+    const std::string device_name(buffer.begin(), buffer.end());
+    DeviceFD fd = nvdrv->Open(device_name);

-    u32 fd = nvdrv->Open(device_name);
    IPC::ResponseBuilder rb{ctx, 4};
    rb.Push(RESULT_SUCCESS);
-    rb.Push<u32>(fd);
-    rb.Push<u32>(0);
+    rb.Push<DeviceFD>(fd);
+    rb.PushEnum(fd != INVALID_NVDRV_FD ? NvResult::Success : NvResult::FileOperationFailed);
 }

-void NVDRV::IoctlBase(Kernel::HLERequestContext& ctx, IoctlVersion version) {
-    IPC::RequestParser rp{ctx};
-    u32 fd = rp.Pop<u32>();
-    u32 command = rp.Pop<u32>();
-
-    /// Ioctl 3 has 2 outputs, first in the input params, second is the result
-    std::vector<u8> output(ctx.GetWriteBufferSize(0));
-    std::vector<u8> output2;
-    if (version == IoctlVersion::Version3) {
-        output2.resize((ctx.GetWriteBufferSize(1)));
-    }
-
-    /// Ioctl2 has 2 inputs. It's used to pass data directly instead of providing a pointer.
-    /// KickOfPB uses this
-    auto input = ctx.ReadBuffer(0);
-
-    std::vector<u8> input2;
-    if (version == IoctlVersion::Version2) {
-        input2 = ctx.ReadBuffer(1);
-    }
-
-    IoctlCtrl ctrl{};
-
-    u32 result = nvdrv->Ioctl(fd, command, input, input2, output, output2, ctrl, version);
-
-    if (ctrl.must_delay) {
-        ctrl.fresh_call = false;
-        ctx.SleepClientThread(
-            "NVServices::DelayedResponse", ctrl.timeout,
-            [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_,
-                      Kernel::ThreadWakeupReason reason) {
-                IoctlCtrl ctrl2{ctrl};
-                std::vector<u8> tmp_output = output;
-                std::vector<u8> tmp_output2 = output2;
-                const u32 ioctl_result = nvdrv->Ioctl(fd, command, input, input2, tmp_output,
-                                                      tmp_output2, ctrl2, version);
-                ctx_.WriteBuffer(tmp_output, 0);
-                if (version == IoctlVersion::Version3) {
-                    ctx_.WriteBuffer(tmp_output2, 1);
-                }
-                IPC::ResponseBuilder rb{ctx_, 3};
-                rb.Push(RESULT_SUCCESS);
-                rb.Push(ioctl_result);
-            },
-            nvdrv->GetEventWriteable(ctrl.event_id));
-    } else {
-        ctx.WriteBuffer(output);
-        if (version == IoctlVersion::Version3) {
-            ctx.WriteBuffer(output2, 1);
-        }
-    }
+void NVDRV::ServiceError(Kernel::HLERequestContext& ctx, NvResult result) {
    IPC::ResponseBuilder rb{ctx, 3};
    rb.Push(RESULT_SUCCESS);
-    rb.Push(result);
+    rb.PushEnum(result);
 }

-void NVDRV::Ioctl(Kernel::HLERequestContext& ctx) {
-    LOG_DEBUG(Service_NVDRV, "called");
-    IoctlBase(ctx, IoctlVersion::Version1);
+void NVDRV::Ioctl1(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto fd = rp.Pop<DeviceFD>();
+    const auto command = rp.PopRaw<Ioctl>();
+    LOG_DEBUG(Service_NVDRV, "called fd={}, ioctl=0x{:08X}", fd, command.raw);
+
+    if (!is_initialized) {
+        ServiceError(ctx, NvResult::NotInitialized);
+        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        return;
+    }
+
+    // Check device
+    std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
+    const auto input_buffer = ctx.ReadBuffer(0);
+
+    const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer);
+
+    if (command.is_out != 0) {
+        ctx.WriteBuffer(output_buffer);
+    }
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushEnum(nv_result);
 }

 void NVDRV::Ioctl2(Kernel::HLERequestContext& ctx) {
-    LOG_DEBUG(Service_NVDRV, "called");
-    IoctlBase(ctx, IoctlVersion::Version2);
+    IPC::RequestParser rp{ctx};
+    const auto fd = rp.Pop<DeviceFD>();
+    const auto command = rp.PopRaw<Ioctl>();
+    LOG_DEBUG(Service_NVDRV, "called fd={}, ioctl=0x{:08X}", fd, command.raw);
+
+    if (!is_initialized) {
+        ServiceError(ctx, NvResult::NotInitialized);
+        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        return;
+    }
+
+    const auto input_buffer = ctx.ReadBuffer(0);
+    const auto input_inlined_buffer = ctx.ReadBuffer(1);
+    std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
+
+    const auto nv_result =
+        nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer);
+
+    if (command.is_out != 0) {
+        ctx.WriteBuffer(output_buffer);
+    }
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushEnum(nv_result);
 }

 void NVDRV::Ioctl3(Kernel::HLERequestContext& ctx) {
-    LOG_DEBUG(Service_NVDRV, "called");
-    IoctlBase(ctx, IoctlVersion::Version3);
+    IPC::RequestParser rp{ctx};
+    const auto fd = rp.Pop<DeviceFD>();
+    const auto command = rp.PopRaw<Ioctl>();
+    LOG_DEBUG(Service_NVDRV, "called fd={}, ioctl=0x{:08X}", fd, command.raw);
+
+    if (!is_initialized) {
+        ServiceError(ctx, NvResult::NotInitialized);
+        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        return;
+    }
+
+    const auto input_buffer = ctx.ReadBuffer(0);
+    std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
+    std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1));
+
+    const auto nv_result =
+        nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline);
+
+    if (command.is_out != 0) {
+        ctx.WriteBuffer(output_buffer, 0);
+        ctx.WriteBuffer(output_buffer_inline, 1);
+    }
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushEnum(nv_result);
 }

 void NVDRV::Close(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_NVDRV, "called");

+    if (!is_initialized) {
+        ServiceError(ctx, NvResult::NotInitialized);
+        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        return;
+    }
+
    IPC::RequestParser rp{ctx};
-    u32 fd = rp.Pop<u32>();
+    const auto fd = rp.Pop<DeviceFD>();
+    const auto result = nvdrv->Close(fd);

-    auto result = nvdrv->Close(fd);
-
-    IPC::ResponseBuilder rb{ctx, 2};
-    rb.Push(result);
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushEnum(result);
 }

 void NVDRV::Initialize(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service_NVDRV, "(STUBBED) called");

+    is_initialized = true;
+
    IPC::ResponseBuilder rb{ctx, 3};
    rb.Push(RESULT_SUCCESS);
-    rb.Push<u32>(0);
+    rb.PushEnum(NvResult::Success);
 }

 void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};
-    u32 fd = rp.Pop<u32>();
-    // TODO(Blinkhawk): Figure the meaning of the flag at bit 16
-    u32 event_id = rp.Pop<u32>() & 0x000000FF;
+    const auto fd = rp.Pop<DeviceFD>();
+    const auto event_id = rp.Pop<u32>() & 0x00FF;
    LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id);

-    IPC::ResponseBuilder rb{ctx, 3, 1};
-    rb.Push(RESULT_SUCCESS);
+    if (!is_initialized) {
+        ServiceError(ctx, NvResult::NotInitialized);
+        LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+        return;
+    }
+
+    const auto nv_result = nvdrv->VerifyFD(fd);
+    if (nv_result != NvResult::Success) {
+        LOG_ERROR(Service_NVDRV, "Invalid FD specified DeviceFD={}!", fd);
+        ServiceError(ctx, nv_result);
+        return;
+    }
+
    if (event_id < MaxNvEvents) {
+        IPC::ResponseBuilder rb{ctx, 3, 1};
+        rb.Push(RESULT_SUCCESS);
        auto event = nvdrv->GetEvent(event_id);
        event->Clear();
        rb.PushCopyObjects(event);
-        rb.Push<u32>(NvResult::Success);
+        rb.PushEnum(NvResult::Success);
    } else {
-        rb.Push<u32>(0);
-        rb.Push<u32>(NvResult::BadParameter);
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.PushEnum(NvResult::BadParameter);
    }
 }

@@ -151,7 +197,7 @@ void NVDRV::SetAruid(Kernel::HLERequestContext& ctx) {

    IPC::ResponseBuilder rb{ctx, 3};
    rb.Push(RESULT_SUCCESS);
-    rb.Push<u32>(0);
+    rb.PushEnum(NvResult::Success);
 }

 void NVDRV::SetGraphicsFirmwareMemoryMarginEnabled(Kernel::HLERequestContext& ctx) {
@@ -164,8 +210,9 @@ void NVDRV::SetGraphicsFirmwareMemoryMarginEnabled(Kernel::HLERequestContext& ct
 void NVDRV::GetStatus(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service_NVDRV, "(STUBBED) called");

-    IPC::ResponseBuilder rb{ctx, 2};
+    IPC::ResponseBuilder rb{ctx, 3};
    rb.Push(RESULT_SUCCESS);
+    rb.PushEnum(NvResult::Success);
 }

 void NVDRV::DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx) {
@@ -181,7 +228,7 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
    : ServiceFramework(name), nvdrv(std::move(nvdrv)) {
    static const FunctionInfo functions[] = {
        {0, &NVDRV::Open, "Open"},
-        {1, &NVDRV::Ioctl, "Ioctl"},
+        {1, &NVDRV::Ioctl1, "Ioctl"},
        {2, &NVDRV::Close, "Close"},
        {3, &NVDRV::Initialize, "Initialize"},
        {4, &NVDRV::QueryEvent, "QueryEvent"},
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@@ -23,7 +23,7 @@ public:

 private:
    void Open(Kernel::HLERequestContext& ctx);
-    void Ioctl(Kernel::HLERequestContext& ctx);
+    void Ioctl1(Kernel::HLERequestContext& ctx);
    void Ioctl2(Kernel::HLERequestContext& ctx);
    void Ioctl3(Kernel::HLERequestContext& ctx);
    void Close(Kernel::HLERequestContext& ctx);
@@ -33,11 +33,13 @@ private:
    void SetGraphicsFirmwareMemoryMarginEnabled(Kernel::HLERequestContext& ctx);
    void GetStatus(Kernel::HLERequestContext& ctx);
    void DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx);
-    void IoctlBase(Kernel::HLERequestContext& ctx, IoctlVersion version);
+
+    void ServiceError(Kernel::HLERequestContext& ctx, NvResult result);

    std::shared_ptr<Module> nvdrv;

    u64 pid{};
+    bool is_initialized{};
 };

 } // namespace Service::Nvidia
--- a/src/core/hle/service/nvdrv/nvdata.h
+++ b/src/core/hle/service/nvdrv/nvdata.h
@@ -1,12 +1,16 @@
 #pragma once

 #include <array>
+#include "common/bit_field.h"
 #include "common/common_types.h"

 namespace Service::Nvidia {

 constexpr u32 MaxSyncPoints = 192;
 constexpr u32 MaxNvEvents = 64;
+using DeviceFD = s32;
+
+constexpr DeviceFD INVALID_NVDRV_FD = -1;

 struct Fence {
    s32 id;
@@ -20,11 +24,61 @@ struct MultiFence {
    std::array<Fence, 4> fences;
 };

-enum NvResult : u32 {
-    Success = 0,
-    BadParameter = 4,
-    Timeout = 5,
-    ResourceError = 15,
+enum class NvResult : u32 {
+    Success = 0x0,
+    NotImplemented = 0x1,
+    NotSupported = 0x2,
+    NotInitialized = 0x3,
+    BadParameter = 0x4,
+    Timeout = 0x5,
+    InsufficientMemory = 0x6,
+    ReadOnlyAttribute = 0x7,
+    InvalidState = 0x8,
+    InvalidAddress = 0x9,
+    InvalidSize = 0xA,
+    BadValue = 0xB,
+    AlreadyAllocated = 0xD,
+    Busy = 0xE,
+    ResourceError = 0xF,
+    CountMismatch = 0x10,
+    OverFlow = 0x11,
+    InsufficientTransferMemory = 0x1000,
+    InsufficientVideoMemory = 0x10000,
+    BadSurfaceColorScheme = 0x10001,
+    InvalidSurface = 0x10002,
+    SurfaceNotSupported = 0x10003,
+    DispInitFailed = 0x20000,
+    DispAlreadyAttached = 0x20001,
+    DispTooManyDisplays = 0x20002,
+    DispNoDisplaysAttached = 0x20003,
+    DispModeNotSupported = 0x20004,
+    DispNotFound = 0x20005,
+    DispAttachDissallowed = 0x20006,
+    DispTypeNotSupported = 0x20007,
+    DispAuthenticationFailed = 0x20008,
+    DispNotAttached = 0x20009,
+    DispSamePwrState = 0x2000A,
+    DispEdidFailure = 0x2000B,
+    DispDsiReadAckError = 0x2000C,
+    DispDsiReadInvalidResp = 0x2000D,
+    FileWriteFailed = 0x30000,
+    FileReadFailed = 0x30001,
+    EndOfFile = 0x30002,
+    FileOperationFailed = 0x30003,
+    DirOperationFailed = 0x30004,
+    EndOfDirList = 0x30005,
+    ConfigVarNotFound = 0x30006,
+    InvalidConfigVar = 0x30007,
+    LibraryNotFound = 0x30008,
+    SymbolNotFound = 0x30009,
+    MemoryMapFailed = 0x3000A,
+    IoctlFailed = 0x3000F,
+    AccessDenied = 0x30010,
+    DeviceNotFound = 0x30011,
+    KernelDriverNotFound = 0x30012,
+    FileNotFound = 0x30013,
+    PathAlreadyExists = 0x30014,
+    ModuleNotPresent = 0xA000E,
 };

 enum class EventState {
@@ -34,21 +88,13 @@ enum class EventState {
    Busy = 3,
 };

-enum class IoctlVersion : u32 {
-    Version1,
-    Version2,
-    Version3,
-};
-
-struct IoctlCtrl {
-    // First call done to the servioce for services that call itself again after a call.
-    bool fresh_call{true};
-    // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep
-    bool must_delay{};
-    // Timeout for the delay
-    s64 timeout{};
-    // NV Event Id
-    s32 event_id{-1};
+union Ioctl {
+    u32_le raw;
+    BitField<0, 8, u32> cmd;
+    BitField<8, 8, u32> group;
+    BitField<16, 14, u32> length;
+    BitField<30, 1, u32> is_in;
+    BitField<31, 1, u32> is_out;
 };

 } // namespace Service::Nvidia
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -5,6 +5,7 @@
 #include <utility>

 #include <fmt/format.h>
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/writable_event.h"
@@ -21,6 +22,7 @@
 #include "core/hle/service/nvdrv/interface.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/nvdrv/nvmemp.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/hle/service/nvflinger/nvflinger.h"

 namespace Service::Nvidia {
@@ -36,21 +38,23 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
    nvflinger.SetNVDrvInstance(module_);
 }

-Module::Module(Core::System& system) {
+Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
    auto& kernel = system.Kernel();
    for (u32 i = 0; i < MaxNvEvents; i++) {
        std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
-        events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label);
+        events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)};
        events_interface.status[i] = EventState::Free;
        events_interface.registered[i] = false;
    }
    auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
    devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
-    devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev);
+    devices["/dev/nvhost-gpu"] =
+        std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager);
    devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
    devices["/dev/nvmap"] = nvmap_dev;
    devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
-    devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface);
+    devices["/dev/nvhost-ctrl"] =
+        std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
    devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
    devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
    devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
@@ -58,36 +62,101 @@ Module::Module(Core::System& system) {

 Module::~Module() = default;

-u32 Module::Open(const std::string& device_name) {
-    ASSERT_MSG(devices.find(device_name) != devices.end(), "Trying to open unknown device {}",
-               device_name);
+NvResult Module::VerifyFD(DeviceFD fd) const {
+    if (fd < 0) {
+        LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
+        return NvResult::InvalidState;
+    }
+
+    if (open_files.find(fd) == open_files.end()) {
+        LOG_ERROR(Service_NVDRV, "Could not find DeviceFD={}!", fd);
+        return NvResult::NotImplemented;
+    }
+
+    return NvResult::Success;
+}
+
+DeviceFD Module::Open(const std::string& device_name) {
+    if (devices.find(device_name) == devices.end()) {
+        LOG_ERROR(Service_NVDRV, "Trying to open unknown device {}", device_name);
+        return INVALID_NVDRV_FD;
+    }

    auto device = devices[device_name];
-    const u32 fd = next_fd++;
+    const DeviceFD fd = next_fd++;

    open_files[fd] = std::move(device);

    return fd;
 }

-u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, const std::vector<u8>& input2,
-                  std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-                  IoctlVersion version) {
-    auto itr = open_files.find(fd);
-    ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device");
+NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
+                        std::vector<u8>& output) {
+    if (fd < 0) {
+        LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
+        return NvResult::InvalidState;
+    }

-    auto& device = itr->second;
-    return device->ioctl({command}, input, input2, output, output2, ctrl, version);
+    const auto itr = open_files.find(fd);
+
+    if (itr == open_files.end()) {
+        LOG_ERROR(Service_NVDRV, "Could not find DeviceFD={}!", fd);
+        return NvResult::NotImplemented;
+    }
+
+    return itr->second->Ioctl1(command, input, output);
 }

-ResultCode Module::Close(u32 fd) {
-    auto itr = open_files.find(fd);
-    ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device");
+NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
+                        const std::vector<u8>& inline_input, std::vector<u8>& output) {
+    if (fd < 0) {
+        LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
+        return NvResult::InvalidState;
+    }
+
+    const auto itr = open_files.find(fd);
+
+    if (itr == open_files.end()) {
+        LOG_ERROR(Service_NVDRV, "Could not find DeviceFD={}!", fd);
+        return NvResult::NotImplemented;
+    }
+
+    return itr->second->Ioctl2(command, input, inline_input, output);
+}
+
+NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
+                        std::vector<u8>& output, std::vector<u8>& inline_output) {
+    if (fd < 0) {
+        LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
+        return NvResult::InvalidState;
+    }
+
+    const auto itr = open_files.find(fd);
+
+    if (itr == open_files.end()) {
+        LOG_ERROR(Service_NVDRV, "Could not find DeviceFD={}!", fd);
+        return NvResult::NotImplemented;
+    }
+
+    return itr->second->Ioctl3(command, input, output, inline_output);
+}
+
+NvResult Module::Close(DeviceFD fd) {
+    if (fd < 0) {
+        LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
+        return NvResult::InvalidState;
+    }
+
+    const auto itr = open_files.find(fd);
+
+    if (itr == open_files.end()) {
+        LOG_ERROR(Service_NVDRV, "Could not find DeviceFD={}!", fd);
+        return NvResult::NotImplemented;
+    }

    open_files.erase(itr);

-    // TODO(flerovium): return correct result code if operation failed.
-    return RESULT_SUCCESS;
+    return NvResult::Success;
 }

 void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
@@ -95,17 +164,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
        if (events_interface.assigned_syncpt[i] == syncpoint_id &&
            events_interface.assigned_value[i] == value) {
            events_interface.LiberateEvent(i);
-            events_interface.events[i].writable->Signal();
+            events_interface.events[i].event.writable->Signal();
        }
    }
 }

 std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const {
-    return events_interface.events[event_id].readable;
+    return events_interface.events[event_id].event.readable;
 }

 std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const {
-    return events_interface.events[event_id].writable;
+    return events_interface.events[event_id].event.writable;
 }

 } // namespace Service::Nvidia
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -10,6 +10,7 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/nvdrv/nvdata.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/hle/service/service.h"

 namespace Core {
@@ -22,15 +23,23 @@ class NVFlinger;

 namespace Service::Nvidia {

+class SyncpointManager;
+
 namespace Devices {
 class nvdevice;
 }

+/// Represents an Nvidia event
+struct NvEvent {
+    Kernel::EventPair event;
+    Fence fence{};
+};
+
 struct EventInterface {
    // Mask representing currently busy events
    u64 events_mask{};
    // Each kernel event associated to an NV event
-    std::array<Kernel::EventPair, MaxNvEvents> events;
+    std::array<NvEvent, MaxNvEvents> events;
    // The status of the current NVEvent
    std::array<EventState, MaxNvEvents> status{};
    // Tells if an NVEvent is registered or not
@@ -103,14 +112,23 @@ public:
        return std::static_pointer_cast<T>(itr->second);
    }

+    NvResult VerifyFD(DeviceFD fd) const;
+
    /// Opens a device node and returns a file descriptor to it.
-    u32 Open(const std::string& device_name);
+    DeviceFD Open(const std::string& device_name);
+
    /// Sends an ioctl command to the specified file descriptor.
-    u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, const std::vector<u8>& input2,
-              std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
-              IoctlVersion version);
+    NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
+                    std::vector<u8>& output);
+
+    NvResult Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
+                    const std::vector<u8>& inline_input, std::vector<u8>& output);
+
+    NvResult Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
+                    std::vector<u8>& output, std::vector<u8>& inline_output);
+
    /// Closes a device file descriptor and returns operation success.
-    ResultCode Close(u32 fd);
+    NvResult Close(DeviceFD fd);

    void SignalSyncpt(const u32 syncpoint_id, const u32 value);

@@ -119,11 +137,14 @@ public:
    std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const;

 private:
+    /// Manages syncpoints on the host
+    SyncpointManager syncpoint_manager;
+
    /// Id to use for the next open file descriptor.
-    u32 next_fd = 1;
+    DeviceFD next_fd = 1;

    /// Mapping of file descriptors to the devices they reference.
-    std::unordered_map<u32, std::shared_ptr<Devices::nvdevice>> open_files;
+    std::unordered_map<DeviceFD, std::shared_ptr<Devices::nvdevice>> open_files;

    /// Mapping of device node names to their implementation.
    std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices;
--- a/src/core/hle/service/nvdrv/syncpoint_manager.cpp
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.cpp
@@ -0,0 +1,39 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
+#include "video_core/gpu.h"
+
+namespace Service::Nvidia {
+
+SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {}
+
+SyncpointManager::~SyncpointManager() = default;
+
+u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) {
+    syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id);
+    return GetSyncpointMin(syncpoint_id);
+}
+
+u32 SyncpointManager::AllocateSyncpoint() {
+    for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) {
+        if (!syncpoints[syncpoint_id].is_allocated) {
+            syncpoints[syncpoint_id].is_allocated = true;
+            return syncpoint_id;
+        }
+    }
+    UNREACHABLE_MSG("No more available syncpoints!");
+    return {};
+}
+
+u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) {
+    for (u32 index = 0; index < value; ++index) {
+        syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed);
+    }
+
+    return GetSyncpointMax(syncpoint_id);
+}
+
+} // namespace Service::Nvidia
--- a/src/core/hle/service/nvdrv/syncpoint_manager.h
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.h
@@ -0,0 +1,85 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+
+#include "common/common_types.h"
+#include "core/hle/service/nvdrv/nvdata.h"
+
+namespace Tegra {
+class GPU;
+}
+
+namespace Service::Nvidia {
+
+class SyncpointManager final {
+public:
+    explicit SyncpointManager(Tegra::GPU& gpu);
+    ~SyncpointManager();
+
+    /**
+     * Returns true if the specified syncpoint is expired for the given value.
+     * @param syncpoint_id Syncpoint ID to check.
+     * @param value Value to check against the specified syncpoint.
+     * @returns True if the specified syncpoint is expired for the given value, otherwise False.
+     */
+    bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const {
+        return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value);
+    }
+
+    /**
+     * Gets the lower bound for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to get the lower bound for.
+     * @returns The lower bound for the specified syncpoint.
+     */
+    u32 GetSyncpointMin(u32 syncpoint_id) const {
+        return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed);
+    }
+
+    /**
+     * Gets the uper bound for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to get the upper bound for.
+     * @returns The upper bound for the specified syncpoint.
+     */
+    u32 GetSyncpointMax(u32 syncpoint_id) const {
+        return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed);
+    }
+
+    /**
+     * Refreshes the minimum value for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to be refreshed.
+     * @returns The new syncpoint minimum value.
+     */
+    u32 RefreshSyncpoint(u32 syncpoint_id);
+
+    /**
+     * Allocates a new syncoint.
+     * @returns The syncpoint ID for the newly allocated syncpoint.
+     */
+    u32 AllocateSyncpoint();
+
+    /**
+     * Increases the maximum value for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to be increased.
+     * @param value Value to increase the specified syncpoint by.
+     * @returns The new syncpoint maximum value.
+     */
+    u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value);
+
+private:
+    struct Syncpoint {
+        std::atomic<u32> min;
+        std::atomic<u32> max;
+        std::atomic<bool> is_allocated;
+    };
+
+    std::array<Syncpoint, MaxSyncPoints> syncpoints{};
+
+    Tegra::GPU& gpu;
+};
+
+} // namespace Service::Nvidia
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -29,6 +29,10 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
        .slot = slot,
        .status = Buffer::Status::Free,
        .igbp_buffer = igbp_buffer,
+        .transform = {},
+        .crop_rect = {},
+        .swap_interval = 0,
+        .multi_fence = {},
    });

    buffer_wait_event.writable->Signal();
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -242,6 +242,10 @@ void NVFlinger::Compose() {

        const auto& igbp_buffer = buffer->get().igbp_buffer;

+        if (!system.IsPoweredOn()) {
+            return; // We are likely shutting down
+        }
+
        auto& gpu = system.GPU();
        const auto& multi_fence = buffer->get().multi_fence;
        guard->unlock();
--- a/src/core/hle/service/pm/pm.cpp
+++ b/src/core/hle/service/pm/pm.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
--- a/src/core/hle/service/prepo/prepo.cpp
+++ b/src/core/hle/service/prepo/prepo.cpp
@@ -4,6 +4,7 @@

 #include "common/hex_util.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/service/acc/profile_manager.h"
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -14,7 +14,7 @@
 namespace Settings {

 Values values = {};
-bool configuring_global = true;
+static bool configuring_global = true;

 std::string GetTimeZoneString() {
    static constexpr std::array timezones{
@@ -56,6 +56,7 @@ void LogSettings() {
    log_setting("System_RegionIndex", values.region_index.GetValue());
    log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue());
    log_setting("Core_UseMultiCore", values.use_multi_core.GetValue());
+    log_setting("CPU_Accuracy", values.cpu_accuracy);
    log_setting("Renderer_UseResolutionFactor", values.resolution_factor.GetValue());
    log_setting("Renderer_UseFrameLimit", values.use_frame_limit.GetValue());
    log_setting("Renderer_FrameLimit", values.frame_limit.GetValue());
@@ -81,11 +82,12 @@ void LogSettings() {
    log_setting("Services_BCATBoxcatLocal", values.bcat_boxcat_local);
 }

-float Volume() {
-    if (values.audio_muted) {
-        return 0.0f;
-    }
-    return values.volume.GetValue();
+bool IsConfiguringGlobal() {
+    return configuring_global;
+}
+
+void SetConfiguringGlobal(bool is_global) {
+    configuring_global = is_global;
 }

 bool IsGPULevelExtreme() {
@@ -97,6 +99,13 @@ bool IsGPULevelHigh() {
           values.gpu_accuracy.GetValue() == GPUAccuracy::High;
 }

+float Volume() {
+    if (values.audio_muted) {
+        return 0.0f;
+    }
+    return values.volume.GetValue();
+}
+
 void RestoreGlobalState() {
    // If a game is running, DO NOT restore the global settings state
    if (Core::System::GetInstance().IsPoweredOn()) {
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -33,8 +33,6 @@ enum class CPUAccuracy {
    DebugMode = 2,
 };

-extern bool configuring_global;
-
 template <typename Type>
 class Setting final {
 public:
@@ -103,7 +101,7 @@ struct Values {
    bool renderer_debug;
    Setting<int> vulkan_device;

-    Setting<u16> resolution_factor = Setting(static_cast<u16>(1));
+    Setting<u16> resolution_factor{1};
    Setting<int> aspect_ratio;
    Setting<int> max_anisotropy;
    Setting<bool> use_frame_limit;
@@ -198,13 +196,18 @@ struct Values {

    // Add-Ons
    std::map<u64, std::vector<std::string>> disabled_addons;
-} extern values;
+};

-float Volume();
+extern Values values;
+
+bool IsConfiguringGlobal();
+void SetConfiguringGlobal(bool is_global);

 bool IsGPULevelExtreme();
 bool IsGPULevelHigh();

+float Volume();
+
 std::string GetTimeZoneString();

 void Apply();
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -42,11 +42,11 @@ public:
    void Decode();

    /// Returns most recently decoded frame
-    AVFrame* GetCurrentFrame();
-    const AVFrame* GetCurrentFrame() const;
+    [[nodiscard]] AVFrame* GetCurrentFrame();
+    [[nodiscard]] const AVFrame* GetCurrentFrame() const;

    /// Returns the value of current_codec
-    NvdecCommon::VideoCodec GetCurrentCodec() const;
+    [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;

 private:
    bool initialized{};
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -43,7 +43,8 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {}

 H264::~H264() = default;

-std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) {
+const std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+                                                bool is_first_frame) {
    H264DecoderContext context{};
    gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));

--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -51,14 +51,14 @@ public:
    void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);

    /// Return the bitstream as a vector.
-    std::vector<u8>& GetByteArray();
-    const std::vector<u8>& GetByteArray() const;
+    [[nodiscard]] std::vector<u8>& GetByteArray();
+    [[nodiscard]] const std::vector<u8>& GetByteArray() const;

 private:
    void WriteBits(s32 value, s32 bit_count);
    void WriteExpGolombCodedInt(s32 value);
    void WriteExpGolombCodedUInt(u32 value);
-    s32 GetFreeBufferBits();
+    [[nodiscard]] s32 GetFreeBufferBits();
    void Flush();

    s32 buffer_size{8};
@@ -74,8 +74,8 @@ public:
    ~H264();

    /// Compose the H264 header of the frame for FFmpeg decoding
-    std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
-                                        bool is_first_frame = false);
+    [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+                                                            bool is_first_frame = false);

 private:
    struct H264ParameterSet {
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -854,7 +854,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
    return uncomp_writer;
 }

-std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
+const std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
    std::vector<u8> bitstream;
    {
        Vp9FrameContainer curr_frame = GetCurrentFrame(state);
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -119,7 +119,7 @@ public:

    /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
    /// documentation
-    std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
+    [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);

    /// Returns true if the most recent frame was a hidden frame.
    [[nodiscard]] bool WasFrameHidden() const {
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -231,9 +231,8 @@ struct PictureInfo {
    u32 surface_params{};
    INSERT_PADDING_WORDS(3);

-    Vp9PictureInfo Convert() const {
-
-        return Vp9PictureInfo{
+    [[nodiscard]] Vp9PictureInfo Convert() const {
+        return {
            .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
            .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
            .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -26,8 +26,8 @@ public:
    void ProcessMethod(Method method, const std::vector<u32>& arguments);

    /// Return most recently decoded frame
-    AVFrame* GetFrame();
-    const AVFrame* GetFrame() const;
+    [[nodiscard]] AVFrame* GetFrame();
+    [[nodiscard]] const AVFrame* GetFrame() const;

 private:
    /// Invoke codec to decode a frame
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/cityhash.h"
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/memory.h"
@@ -45,32 +46,41 @@ bool DmaPusher::Step() {
        return false;
    }

-    const CommandList& command_list{dma_pushbuffer.front()};
-    ASSERT_OR_EXECUTE(!command_list.empty(), {
-        // Somehow the command_list is empty, in order to avoid a crash
-        // We ignore it and assume its size is 0.
+    CommandList& command_list{dma_pushbuffer.front()};
+
+    ASSERT_OR_EXECUTE(
+        command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
+            // Somehow the command_list is empty, in order to avoid a crash
+            // We ignore it and assume its size is 0.
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+            return true;
+        });
+
+    if (command_list.prefetch_command_list.size()) {
+        // Prefetched command list from nvdrv, used for things like synchronization
+        command_headers = std::move(command_list.prefetch_command_list);
        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-        return true;
-    });
-    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
-    const GPUVAddr dma_get = command_list_header.addr;
+    } else {
+        const CommandListHeader command_list_header{
+            command_list.command_lists[dma_pushbuffer_subindex++]};
+        const GPUVAddr dma_get = command_list_header.addr;

-    if (dma_pushbuffer_subindex >= command_list.size()) {
-        // We've gone through the current list, remove it from the queue
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
+        if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
+            // We've gone through the current list, remove it from the queue
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+        }
+
+        if (command_list_header.size == 0) {
+            return true;
+        }
+
+        // Push buffer non-empty, read a word
+        command_headers.resize(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
    }
-
-    if (command_list_header.size == 0) {
-        return true;
-    }
-
-    // Push buffer non-empty, read a word
-    command_headers.resize(command_list_header.size);
-    gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
-                                        command_list_header.size * sizeof(u32));
-
    for (std::size_t index = 0; index < command_headers.size();) {
        const CommandHeader& command_header = command_headers[index];

--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -18,6 +18,8 @@ class System;

 namespace Tegra {

+class GPU;
+
 enum class SubmissionMode : u32 {
    IncreasingOld = 0,
    Increasing = 1,
@@ -27,6 +29,31 @@ enum class SubmissionMode : u32 {
    IncreaseOnce = 5
 };

+// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
+// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
+// So the values you see in docs might be multiplied by 4.
+enum class BufferMethods : u32 {
+    BindObject = 0x0,
+    Nop = 0x2,
+    SemaphoreAddressHigh = 0x4,
+    SemaphoreAddressLow = 0x5,
+    SemaphoreSequence = 0x6,
+    SemaphoreTrigger = 0x7,
+    NotifyIntr = 0x8,
+    WrcacheFlush = 0x9,
+    Unk28 = 0xA,
+    UnkCacheFlush = 0xB,
+    RefCnt = 0x14,
+    SemaphoreAcquire = 0x1A,
+    SemaphoreRelease = 0x1B,
+    FenceValue = 0x1C,
+    FenceAction = 0x1D,
+    WaitForInterrupt = 0x1E,
+    Unk7c = 0x1F,
+    Yield = 0x20,
+    NonPullerMethods = 0x40,
+};
+
 struct CommandListHeader {
    union {
        u64 raw;
@@ -49,9 +76,23 @@ union CommandHeader {
 static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
 static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");

-class GPU;
+inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, SubmissionMode mode) {
+    CommandHeader result{};
+    result.method.Assign(static_cast<u32>(method));
+    result.arg_count.Assign(arg_count);
+    result.mode.Assign(mode);
+    return result;
+}

-using CommandList = std::vector<Tegra::CommandListHeader>;
+struct CommandList final {
+    CommandList() = default;
+    explicit CommandList(std::size_t size) : command_lists(size) {}
+    explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list)
+        : prefetch_command_list{std::move(prefetch_command_list)} {}
+
+    std::vector<Tegra::CommandListHeader> command_lists;
+    std::vector<Tegra::CommandHeader> prefetch_command_list;
+};

 /**
 * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
@@ -60,7 +101,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
 * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
 * details on this implementation.
 */
-class DmaPusher {
+class DmaPusher final {
 public:
    explicit DmaPusher(Core::System& system, GPU& gpu);
    ~DmaPusher();
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1893,6 +1893,7 @@ public:
        ICMP_IMM,
        FCMP_RR,
        FCMP_RC,
+        FCMP_IMMR,
        MUFU,  // Multi-Function Operator
        RRO_C, // Range Reduction Operator
        RRO_R,
@@ -2205,6 +2206,7 @@ private:
            INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
            INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
            INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
+            INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -194,30 +194,6 @@ void GPU::SyncGuestHost() {
 void GPU::OnCommandListEnd() {
    renderer->Rasterizer().ReleaseFences();
 }
-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
-enum class BufferMethods {
-    BindObject = 0x0,
-    Nop = 0x2,
-    SemaphoreAddressHigh = 0x4,
-    SemaphoreAddressLow = 0x5,
-    SemaphoreSequence = 0x6,
-    SemaphoreTrigger = 0x7,
-    NotifyIntr = 0x8,
-    WrcacheFlush = 0x9,
-    Unk28 = 0xA,
-    UnkCacheFlush = 0xB,
-    RefCnt = 0x14,
-    SemaphoreAcquire = 0x1A,
-    SemaphoreRelease = 0x1B,
-    FenceValue = 0x1C,
-    FenceAction = 0x1D,
-    Unk78 = 0x1E,
-    Unk7c = 0x1F,
-    Yield = 0x20,
-    NonPullerMethods = 0x40,
-};

 enum class GpuSemaphoreOperation {
    AcquireEqual = 0x1,
@@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
    case BufferMethods::UnkCacheFlush:
    case BufferMethods::WrcacheFlush:
    case BufferMethods::FenceValue:
+        break;
    case BufferMethods::FenceAction:
+        ProcessFenceActionMethod();
+        break;
+    case BufferMethods::WaitForInterrupt:
+        ProcessWaitForInterruptMethod();
        break;
    case BufferMethods::SemaphoreTrigger: {
        ProcessSemaphoreTriggerMethod();
@@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
    }
 }

+void GPU::ProcessFenceActionMethod() {
+    switch (regs.fence_action.op) {
+    case FenceOperation::Acquire:
+        WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+        break;
+    case FenceOperation::Increment:
+        IncrementSyncPoint(regs.fence_action.syncpoint_id);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented operation {}",
+                          static_cast<u32>(regs.fence_action.op.Value()));
+    }
+}
+
+void GPU::ProcessWaitForInterruptMethod() {
+    // TODO(bunnei) ImplementMe
+    LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
 void GPU::ProcessSemaphoreTriggerMethod() {
    const auto semaphoreOperationMask = 0xF;
    const auto op =
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -263,6 +263,24 @@ public:
        return use_nvdec;
    }

+    enum class FenceOperation : u32 {
+        Acquire = 0,
+        Increment = 1,
+    };
+
+    union FenceAction {
+        u32 raw;
+        BitField<0, 1, FenceOperation> op;
+        BitField<8, 24, u32> syncpoint_id;
+
+        static CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
+            FenceAction result{};
+            result.op.Assign(op);
+            result.syncpoint_id.Assign(syncpoint_id);
+            return {result.raw};
+        }
+    };
+
    struct Regs {
        static constexpr size_t NUM_REGS = 0x40;

@@ -291,10 +309,7 @@ public:
                u32 semaphore_acquire;
                u32 semaphore_release;
                u32 fence_value;
-                union {
-                    BitField<4, 4, u32> operation;
-                    BitField<8, 8, u32> id;
-                } fence_action;
+                FenceAction fence_action;
                INSERT_UNION_PADDING_WORDS(0xE2);

                // Puller state
@@ -342,6 +357,8 @@ protected:

 private:
    void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessFenceActionMethod();
+    void ProcessWaitForInterruptMethod();
    void ProcessSemaphoreTriggerMethod();
    void ProcessSemaphoreRelease();
    void ProcessSemaphoreAcquire();
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -39,8 +39,8 @@ using Operation = const OperationNode&;
 constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};

 char Swizzle(std::size_t component) {
-    ASSERT(component < 4);
-    return component["xyzw"];
+    static constexpr std::string_view SWIZZLE{"xyzw"};
+    return SWIZZLE.at(component);
 }

 constexpr bool IsGenericAttribute(Attribute::Index index) {
@@ -224,7 +224,7 @@ private:

    std::string Visit(const Node& node);

-    std::pair<std::string, std::size_t> BuildCoords(Operation);
+    std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
    std::string BuildAoffi(Operation);
    std::string GlobalMemoryPointer(const GmemNode& gmem);
    void Exit();
@@ -1416,12 +1416,12 @@ std::string ARBDecompiler::Visit(const Node& node) {
    return {};
 }

-std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
+std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    UNIMPLEMENTED_IF(meta.sampler.is_indexed);
-    UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
-                     meta.sampler.type == Tegra::Shader::TextureType::TextureCube);

+    const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
+                             meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
    const std::size_t count = operation.GetOperandsCount();
    std::string temporary = AllocVectorTemporary();
    std::size_t i = 0;
@@ -1429,12 +1429,21 @@ std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operati
        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
    }
    if (meta.sampler.is_array) {
-        AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
+        AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
+        ++i;
    }
    if (meta.sampler.is_shadow) {
-        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
+        std::string compare = Visit(meta.depth_compare);
+        if (is_extended) {
+            ASSERT(i == 4);
+            std::string extra_coord = AllocVectorTemporary();
+            AddLine("MOV.F {}.x, {};", extra_coord, compare);
+            return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
+        }
+        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
+        ++i;
    }
-    return {std::move(temporary), i};
+    return {temporary, temporary, i};
 }

 std::string ARBDecompiler::BuildAoffi(Operation operation) {
@@ -1859,7 +1868,7 @@ std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
 std::string ARBDecompiler::Texture(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-    const auto [temporary, swizzle] = BuildCoords(operation);
+    const auto [coords, temporary, swizzle] = BuildCoords(operation);

    std::string_view opcode = "TEX";
    std::string extra;
@@ -1888,7 +1897,7 @@ std::string ARBDecompiler::Texture(Operation operation) {
        }
    }

-    AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
+    AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
            TextureType(meta), BuildAoffi(operation));
    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
@@ -1897,7 +1906,7 @@ std::string ARBDecompiler::Texture(Operation operation) {
 std::string ARBDecompiler::TextureGather(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-    const auto [temporary, swizzle] = BuildCoords(operation);
+    const auto [coords, temporary, swizzle] = BuildCoords(operation);

    std::string comp;
    if (!meta.sampler.is_shadow) {
@@ -1907,7 +1916,7 @@ std::string ARBDecompiler::TextureGather(Operation operation) {

    AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
            TextureType(meta), BuildAoffi(operation));
-    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+    AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
 }

@@ -1945,13 +1954,13 @@ std::string ARBDecompiler::TextureQueryLod(Operation operation) {
 std::string ARBDecompiler::TexelFetch(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-    const auto [temporary, swizzle] = BuildCoords(operation);
+    const auto [coords, temporary, swizzle] = BuildCoords(operation);

    if (!meta.sampler.is_buffer) {
        ASSERT(swizzle < 4);
        AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
    }
-    AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
+    AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
            BuildAoffi(operation));
    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
@@ -1962,7 +1971,7 @@ std::string ARBDecompiler::TextureGradient(Operation operation) {
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
    const std::string ddx = AllocVectorTemporary();
    const std::string ddy = AllocVectorTemporary();
-    const std::string coord = BuildCoords(operation).first;
+    const std::string coord = std::get<1>(BuildCoords(operation));

    const std::size_t num_components = meta.derivates.size() / 2;
    for (std::size_t index = 0; index < num_components; ++index) {
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2056,15 +2056,19 @@ private:
    }

    Expression Texture(Operation operation) {
-        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        ASSERT(meta);
-
-        std::string expr = GenerateTexture(
-            operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}});
-        if (meta->sampler.is_shadow) {
-            expr = "vec4(" + expr + ')';
+        const auto meta = std::get<MetaTexture>(operation.GetMeta());
+        const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
+                                 meta.sampler.is_array && meta.sampler.is_shadow;
+        // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
+        const std::vector<TextureIR> extras{
+            TextureOffset{},
+            TextureArgument{Type::Float, meta.bias},
+        };
+        std::string expr = GenerateTexture(operation, "", extras, separate_dc);
+        if (meta.sampler.is_shadow) {
+            expr = fmt::format("vec4({})", expr);
        }
-        return {expr + GetSwizzle(meta->element), Type::Float};
+        return {expr + GetSwizzle(meta.element), Type::Float};
    }

    Expression TextureLod(Operation operation) {
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -771,13 +771,18 @@ void VKDevice::CollectTelemetryParameters() {
    VkPhysicalDeviceDriverPropertiesKHR driver{
        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
        .pNext = nullptr,
+        .driverID = {},
+        .driverName = {},
+        .driverInfo = {},
+        .conformanceVersion = {},
    };

-    VkPhysicalDeviceProperties2KHR properties{
+    VkPhysicalDeviceProperties2KHR device_properties{
        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
        .pNext = &driver,
+        .properties = {},
    };
-    physical.GetProperties2KHR(properties);
+    physical.GetProperties2KHR(device_properties);

    driver_id = driver.driverID;
    vendor_name = driver.driverName;
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -159,6 +159,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
        .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
+        .codeSize = 0,
    };

    std::vector<vk::ShaderModule> modules;
@@ -388,6 +389,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
        .logicOp = VK_LOGIC_OP_COPY,
        .attachmentCount = static_cast<u32>(num_attachments),
        .pAttachments = cb_attachments.data(),
+        .blendConstants = {},
    };

    std::vector dynamic_states{
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -20,14 +20,15 @@ AsyncShaders::~AsyncShaders() {
 }

 void AsyncShaders::AllocateWorkers() {
-    // Max worker threads we should allow
-    constexpr u32 MAX_THREADS = 4;
-    // Deduce how many threads we can use
-    const u32 threads_used = std::thread::hardware_concurrency() / 4;
-    // Always allow at least 1 thread regardless of our settings
-    const auto max_worker_count = std::max(1U, threads_used);
-    // Don't use more than MAX_THREADS
-    const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+    // Use at least one thread
+    u32 num_workers = 1;
+
+    // Deduce how many more threads we can use
+    const u32 thread_count = std::thread::hardware_concurrency();
+    if (thread_count >= 8) {
+        // Increase async workers by 1 for every 2 threads >= 8
+        num_workers += 1 + (thread_count - 8) / 2;
+    }

    // If we already have workers queued, ignore
    if (num_workers == worker_threads.size()) {
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -137,7 +137,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
        break;
    }
    case OpCode::Id::FCMP_RR:
-    case OpCode::Id::FCMP_RC: {
+    case OpCode::Id::FCMP_RC:
+    case OpCode::Id::FCMP_IMMR: {
        UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
        Node op_c = GetRegister(instr.gpr39);
        Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -556,7 +556,6 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
    const bool is_shadow = depth_compare != nullptr;
    const bool is_bindless = bindless_reg.has_value();

-    UNIMPLEMENTED_IF(texture_type == TextureType::TextureCube && is_array && is_shadow);
    ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
               "Illegal texture type");

--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -240,6 +240,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
        .is_tiled = is_tiled,
        .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
                           config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
+        .is_layered = false,
        .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
        .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
        .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -264,11 +264,9 @@ endif()
 if (MSVC)
    include(CopyYuzuQt5Deps)
    include(CopyYuzuSDLDeps)
-    include(CopyYuzuUnicornDeps)
    include(CopyYuzuFFmpegDeps)
    copy_yuzu_Qt5_deps(yuzu)
    copy_yuzu_SDL_deps(yuzu)
-    copy_yuzu_unicorn_deps(yuzu)
    copy_yuzu_FFmpeg_deps(yuzu)
 endif()

--- a/src/yuzu/configuration/configure_audio.cpp
+++ b/src/yuzu/configuration/configure_audio.cpp
@@ -25,8 +25,8 @@ ConfigureAudio::ConfigureAudio(QWidget* parent)
    connect(ui->output_sink_combo_box, qOverload<int>(&QComboBox::currentIndexChanged), this,
            &ConfigureAudio::UpdateAudioDevices);

-    ui->volume_label->setVisible(Settings::configuring_global);
-    ui->volume_combo_box->setVisible(!Settings::configuring_global);
+    ui->volume_label->setVisible(Settings::IsConfiguringGlobal());
+    ui->volume_combo_box->setVisible(!Settings::IsConfiguringGlobal());

    SetupPerGameUI();

@@ -51,7 +51,7 @@ void ConfigureAudio::SetConfiguration() {

    ui->toggle_audio_stretching->setChecked(Settings::values.enable_audio_stretching.GetValue());

-    if (!Settings::configuring_global) {
+    if (!Settings::IsConfiguringGlobal()) {
        if (Settings::values.volume.UsingGlobal()) {
            ui->volume_combo_box->setCurrentIndex(0);
            ui->volume_slider->setEnabled(false);
@@ -99,7 +99,7 @@ void ConfigureAudio::SetVolumeIndicatorText(int percentage) {
 }

 void ConfigureAudio::ApplyConfiguration() {
-    if (Settings::configuring_global) {
+    if (Settings::IsConfiguringGlobal()) {
        Settings::values.sink_id =
            ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex())
                .toStdString();
@@ -165,7 +165,7 @@ void ConfigureAudio::RetranslateUI() {
 }

 void ConfigureAudio::SetupPerGameUI() {
-    if (Settings::configuring_global) {
+    if (Settings::IsConfiguringGlobal()) {
        ui->volume_slider->setEnabled(Settings::values.volume.UsingGlobal());
        ui->toggle_audio_stretching->setEnabled(
            Settings::values.enable_audio_stretching.UsingGlobal());
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -15,7 +15,7 @@
 ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry,
                                 InputCommon::InputSubsystem* input_subsystem)
    : QDialog(parent), ui(new Ui::ConfigureDialog), registry(registry) {
-    Settings::configuring_global = true;
+    Settings::SetConfiguringGlobal(true);

    ui->setupUi(this);
    ui->hotkeysTab->Populate(registry);
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Chloe Marcec	fc4d692c50	Addressed issues	2020-11-10 15:57:36 +11:00
Chloe Marcec	31c12de0fe	core: Make nvservices more standardized	2020-11-10 15:57:35 +11:00
Rodrigo Locatti	9b24197ca0	Merge pull request #4909 from lioncash/interrupt cpu_interrupt_handler: Mark move contructor/assignment as deleted	2020-11-08 22:09:40 -03:00
Rodrigo Locatti	8008b5ddc9	Merge pull request #4910 from lioncash/service ipc_helpers: Remove usage of the global system instance	2020-11-08 19:11:31 -03:00
Lioncash	da7be67daf	ipc_helpers: Remove usage of the global system instance Resolves numerous deprecation warnings throughout the codebase due to inclusion of this header. Now building core should be significantly less noisy (and also relying on less global state). This also uncovered quite a few modules that were relying on indirect includes, which have also been fixed.	2020-11-08 15:58:11 -05:00
Lioncash	0aad914527	cpu_interrupt_handler: Mark move contructor/assignment as deleted The interrupt handler contains a std::atomic_bool, which isn't copyable or movable, so the special move member functions will always be deleted, despite being defaulted. This can resolve warnings on clang and GCC.	2020-11-08 15:37:04 -05:00
bunnei	7bf9f9ae49	Merge pull request #4903 from bunnei/remove-gpu-integrity video_core: dma_pusher: Remove integrity check on command lists.	2020-11-08 02:48:22 -08:00
Chloe	9f5facc3aa	Merge pull request #4908 from lioncash/fmt externals: Update fmt to 7.1.2	2020-11-08 20:26:03 +11:00
Lioncash	0785796372	externals: Update fmt to 7.1.2 Updates to the latest bugfix release of fmt.	2020-11-08 03:44:07 -05:00
LC	e829973742	Merge pull request #4906 from lat9nq/log-cpu-accuracy settings: log value of CPU_Accuracy	2020-11-07 17:01:33 -05:00
lat9nq	1e149dc18b	settings: log value of CPU_Accuracy	2020-11-07 16:14:10 -05:00
bunnei	dc5396a466	video_core: dma_pusher: Remove integrity check on command lists. - This seems to cause softlocks in Breath of the Wild.	2020-11-07 00:08:19 -08:00
bunnei	af477fb8c5	Merge pull request #4888 from lioncash/unicorn-remove core: Remove usage of unicorn	2020-11-06 22:39:05 -08:00
bunnei	f6a89edb67	Merge pull request #4899 from lioncash/fiberimpl common/fiber: Move all member variables into impl class	2020-11-06 20:01:03 -08:00
Lioncash	00fb79b2f3	common/fiber: Move all member variables into impl class Hides all of the implementation details for users of the class. This has the benefit of reducing includes and also making the fiber classes movable again.	2020-11-06 20:36:32 -05:00
bunnei	91a45834fd	Merge pull request #4891 from lioncash/clang2 General: Fix clang build	2020-11-06 10:33:13 -08:00
bunnei	0b75ec5316	Merge pull request #4894 from lioncash/fn settings: Simplify initializer of resolution factor	2020-11-06 09:54:02 -08:00
Lioncash	c0ab5b79dc	settings: Simplify initializer of resolution factor This can use a braced initializer to accomplish the same thing with less code.	2020-11-05 22:07:10 -05:00
bunnei	a111a9ae2c	Merge pull request #4854 from ReinUsesLisp/cube-array-shadow shader: Partially implement texture cube array shadow	2020-11-05 16:25:00 -08:00
Lioncash	6f006d051e	General: Fix clang build Allows building on clang to work again	2020-11-05 10:07:16 -05:00
bunnei	d62d28522b	Merge pull request #4889 from lioncash/setting-global core/settings: Move configuring_global behind an API	2020-11-04 17:09:19 -08:00
bunnei	087f52e872	Merge pull request #4858 from lioncash/initializer General: Resolve a few missing initializer warnings	2020-11-04 12:10:10 -08:00
Lioncash	7aae6d6d2b	core/settings: Move configuring_global behind an API Rather than have directly modified global state here, we can make it an implementation detail and have an interface that changes are queried through.	2020-11-04 04:16:37 -05:00
Chloe	6bbbbe8f85	Merge pull request #4869 from bunnei/improve-gpu-sync Improvements to GPU synchronization & various refactoring	2020-11-04 18:36:55 +11:00
Lioncash	fc6db97a09	core: Remove usage of unicorn Unicorn long-since lost most of its use, due to dynarmic gaining support for handling most instructions. At this point any further issues encountered should be used to make dynarmic better. This also allows us to remove our dependency on Python.	2020-11-03 20:22:05 -05:00
bunnei	4bfa411ddc	Merge pull request #4874 from lioncash/nodiscard2 nvdec: Make use of [[nodiscard]] where applicable	2020-11-03 16:34:07 -08:00
bunnei	46fdc94586	Merge pull request #4887 from lioncash/common-build microprofile: Silence warning in headers	2020-11-03 13:41:29 -08:00
Lioncash	ee21b5378b	microprofile: Silence warning in headers Silences a truncation warning by making the truncation explicit and documenting the reason for it.	2020-11-03 15:07:13 -05:00
bunnei	222fe75401	Merge pull request #4873 from lioncash/common-error common: Enable warnings as errors	2020-11-03 11:00:23 -08:00
bunnei	448e4d5c2a	Merge pull request #4878 from bunnei/unload-nrr hle: service: ldr: Implement UnloadNrr.	2020-11-03 08:52:40 -08:00
Lioncash	4a4b685a04	common: Enable warnings as errors Cleans up common so that we can enable warnings as errors.	2020-11-02 15:50:58 -05:00
Lioncash	4f0f481f63	nvdec: Make use of [[nodiscard]] where applicable Prevents bugs from occurring where the results of a function are accidentally discarded	2020-11-02 02:45:15 -05:00
bunnei	1089d76736	Merge pull request #4865 from ameerj/async-threadcount async_shaders: Increase Async worker thread count for >8 thread cpus	2020-11-01 01:54:01 -07:00
bunnei	848bdf8a40	fixup! hle service: nvdrv: nvhost_gpu: Update to use SyncpointManager and other improvements.	2020-11-01 01:52:38 -07:00
bunnei	7d2839d7a3	core: Initialize GPU before services.	2020-11-01 01:52:38 -07:00
bunnei	e67b8678f8	hle service: nvdrv: nvhost_gpu: Update to use SyncpointManager and other improvements. - Refactor so that SubmitGPFIFO and KickoffPB use shared functionality. - Implement add_wait and add_increment flags.	2020-11-01 01:52:38 -07:00
bunnei	c6e1c46ac7	video_core: dma_pusher: Add support for integrity checks. - Log corrupted command lists, rather than crash.	2020-11-01 01:52:38 -07:00
bunnei	c64545d07a	video_core: dma_pusher: Add support for prefetched command lists.	2020-11-01 01:52:38 -07:00
bunnei	1d4cbb92f2	service: hle: nvflinger: Fix potential shutdown crash when GPU is destroyed.	2020-11-01 01:52:38 -07:00
bunnei	6053b95552	video_core: gpu: Implement WaitFence and IncrementSyncPoint.	2020-11-01 01:52:37 -07:00
bunnei	66edfd61c6	hle service: nvdrv: nvhost_ctrl: Update to use SyncpointManager.	2020-11-01 01:52:37 -07:00
bunnei	4a3fd97e48	hle service: nvdrv: Update to instantiate SyncpointManager.	2020-11-01 01:52:34 -07:00
bunnei	d567b7e841	hle: service: nvdrv: Implement SyncpointManager, to manage syncpoints.	2020-11-01 01:51:54 -07:00
Levi Behunin	bca9591660	Rename to align with switchbrew and remove gpu function (#4714 ) * Rename to align with switchbrew * Rename to align with switchbrew and remove gpu function that checks if clearing should be done.	2020-11-01 01:24:17 -07:00
bunnei	98f68d06f1	Merge pull request #4853 from ReinUsesLisp/fcmp-imm shader/arithmetic: Implement FCMP immediate + register variant	2020-10-31 01:25:02 -07:00
bunnei	a0e5cccb92	hle: service: ldr: Implement UnloadNrr. - Used by Final Fantasy X/X-2 HD Remaster.	2020-10-31 01:22:53 -07:00
LC	6db0c0d8d9	Merge pull request #4872 from jbeich/clang video_core: unbreak -Werror in NVDEC with Clang	2020-10-30 15:11:40 -04:00
Lioncash	14a97d082e	CMakeLists: Resolve MSVC build failures Prevents the compiler tripping up about Windows headers.	2020-10-30 14:57:58 -04:00
Lioncash	5553bd3ba2	General: Resolve a few missing initializer warnings Resolves a few -Wmissing-initializer warnings.	2020-10-29 19:37:07 -04:00
ameerj	3620206136	async_shaders: Increase Async worker thread count for 8+ thread cpus Adds 1 async worker thread for every 2 available threads above 8	2020-10-29 14:16:45 -04:00
ReinUsesLisp	657771bdcb	shader: Partially implement texture cube array shadow This implements texture cube arrays with shadow comparisons but doesn't fix the asserts related to it. Fixes out of bounds reads on swizzle constructors and makes them use bounds checked ::at instead of the unsafe operator[].	2020-10-28 17:12:40 -03:00
ReinUsesLisp	44b552be71	shader/arithmetic: Implement FCMP immediate + register variant Trivially add the encoding for this.	2020-10-28 17:05:41 -03:00