last clang-format fix

delete one other trailing whitespace
fix some clang-format
2018-06-04 18:51:44 +02:00 · 2018-06-04 18:44:45 +02:00 · 2018-06-04 18:36:11 +02:00 · 2018-06-04 00:07:02 +02:00 · 2018-06-03 22:52:31 +02:00 · 2018-06-03 22:49:43 +02:00
82 changed files with 1975 additions and 432 deletions
--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -35,6 +35,7 @@ namespace Log {
    SUB(Service, AM)                                                                               \
    SUB(Service, AOC)                                                                              \
    SUB(Service, APM)                                                                              \
+    SUB(Service, BCAT)                                                                             \
    SUB(Service, Fatal)                                                                            \
    SUB(Service, Friend)                                                                           \
    SUB(Service, FS)                                                                               \
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -55,6 +55,7 @@ enum class Class : ClassType {
    Service_AOC,       ///< The AOC (AddOn Content) service
    Service_APM,       ///< The APM (Performance) service
    Service_Audio,     ///< The Audio (Audio control) service
+    Service_BCAT,      ///< The BCAT service
    Service_Fatal,     ///< The Fatal service
    Service_Friend,    ///< The friend service
    Service_FS,        ///< The FS (Filesystem) service
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -4,6 +4,8 @@ add_library(core STATIC
    arm/unicorn/arm_unicorn.h
    core.cpp
    core.h
+    core_cpu.cpp
+    core_cpu.h
    core_timing.cpp
    core_timing.h
    file_sys/directory.h
@@ -122,6 +124,10 @@ add_library(core STATIC
    hle/service/audio/audren_u.h
    hle/service/audio/codecctl.cpp
    hle/service/audio/codecctl.h
+    hle/service/bcat/module.cpp
+    hle/service/bcat/module.h
+    hle/service/bcat/bcat.cpp
+    hle/service/bcat/bcat.h
    hle/service/fatal/fatal.cpp
    hle/service/fatal/fatal.h
    hle/service/fatal/fatal_p.cpp
@@ -169,6 +175,8 @@ add_library(core STATIC
    hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
    hle/service/nvdrv/devices/nvhost_gpu.cpp
    hle/service/nvdrv/devices/nvhost_gpu.h
+    hle/service/nvdrv/devices/nvhost_nvdec.cpp
+    hle/service/nvdrv/devices/nvhost_nvdec.h
    hle/service/nvdrv/devices/nvmap.cpp
    hle/service/nvdrv/devices/nvmap.h
    hle/service/nvdrv/interface.cpp
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -52,7 +52,7 @@ static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) {
 static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
                               void* user_data) {
    ARM_Interface::ThreadContext ctx{};
-    Core::CPU().SaveContext(ctx);
+    Core::CurrentArmInterface().SaveContext(ctx);
    ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
               ctx.pc, ctx.cpu_registers[30]);
    return {};
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -5,10 +5,6 @@
 #include <memory>
 #include <utility>
 #include "common/logging/log.h"
-#ifdef ARCHITECTURE_x86_64
-#include "core/arm/dynarmic/arm_dynarmic.h"
-#endif
-#include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/gdbstub/gdbstub.h"
@@ -31,11 +27,31 @@ namespace Core {

 System::~System() = default;

+/// Runs a CPU core while the system is powered on
+static void RunCpuCore(std::shared_ptr<Cpu> cpu_state) {
+    while (Core::System().GetInstance().IsPoweredOn()) {
+        cpu_state->RunLoop(true);
+    }
+}
+
+Cpu& System::CurrentCpuCore() {
+    // If multicore is enabled, use host thread to figure out the current CPU core
+    if (Settings::values.use_multi_core) {
+        const auto& search = thread_to_cpu.find(std::this_thread::get_id());
+        ASSERT(search != thread_to_cpu.end());
+        ASSERT(search->second);
+        return *search->second;
+    }
+
+    // Otherwise, use single-threaded mode active_core variable
+    return *cpu_cores[active_core];
+}
+
 System::ResultStatus System::RunLoop(bool tight_loop) {
    status = ResultStatus::Success;
-    if (!cpu_core) {
-        return ResultStatus::ErrorNotInitialized;
-    }
+
+    // Update thread_to_cpu in case Core 0 is run from a different host thread
+    thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];

    if (GDBStub::IsServerEnabled()) {
        GDBStub::HandlePacket();
@@ -52,25 +68,14 @@ System::ResultStatus System::RunLoop(bool tight_loop) {
        }
    }

-    // If we don't have a currently active thread then don't execute instructions,
-    // instead advance to the next event and try to yield to the next thread
-    if (Kernel::GetCurrentThread() == nullptr) {
-        NGLOG_TRACE(Core_ARM, "Idling");
-        CoreTiming::Idle();
-        CoreTiming::Advance();
-        PrepareReschedule();
-    } else {
-        CoreTiming::Advance();
-        if (tight_loop) {
-            cpu_core->Run();
-        } else {
-            cpu_core->Step();
+    for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
+        cpu_cores[active_core]->RunLoop(tight_loop);
+        if (Settings::values.use_multi_core) {
+            // Cores 1-3 are run on other threads in this mode
+            break;
        }
    }

-    HW::Update();
-    Reschedule();
-
    return status;
 }

@@ -133,21 +138,26 @@ System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& file
 }

 void System::PrepareReschedule() {
-    cpu_core->PrepareReschedule();
-    reschedule_pending = true;
+    CurrentCpuCore().PrepareReschedule();
 }

 PerfStats::Results System::GetAndResetPerfStats() {
    return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
 }

-void System::Reschedule() {
-    if (!reschedule_pending) {
-        return;
-    }
+const std::shared_ptr<Kernel::Scheduler>& System::Scheduler(size_t core_index) {
+    ASSERT(core_index < NUM_CPU_CORES);
+    return cpu_cores[core_index]->Scheduler();
+}

-    reschedule_pending = false;
-    Core::System::GetInstance().Scheduler().Reschedule();
+ARM_Interface& System::ArmInterface(size_t core_index) {
+    ASSERT(core_index < NUM_CPU_CORES);
+    return cpu_cores[core_index]->ArmInterface();
+}
+
+Cpu& System::CpuCore(size_t core_index) {
+    ASSERT(core_index < NUM_CPU_CORES);
+    return *cpu_cores[core_index];
 }

 System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
@@ -157,26 +167,17 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {

    current_process = Kernel::Process::Create("main");

-    if (Settings::values.use_cpu_jit) {
-#ifdef ARCHITECTURE_x86_64
-        cpu_core = std::make_shared<ARM_Dynarmic>();
-#else
-        cpu_core = std::make_shared<ARM_Unicorn>();
-        NGLOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
-#endif
-    } else {
-        cpu_core = std::make_shared<ARM_Unicorn>();
+    cpu_barrier = std::make_shared<CpuBarrier>();
+    for (size_t index = 0; index < cpu_cores.size(); ++index) {
+        cpu_cores[index] = std::make_shared<Cpu>(cpu_barrier, index);
    }

    gpu_core = std::make_unique<Tegra::GPU>();
-
    telemetry_session = std::make_unique<Core::TelemetrySession>();
-
    service_manager = std::make_shared<Service::SM::ServiceManager>();

    HW::Init();
    Kernel::Init(system_mode);
-    scheduler = std::make_unique<Kernel::Scheduler>(cpu_core.get());
    Service::Init(service_manager);
    GDBStub::Init();

@@ -184,6 +185,17 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
        return ResultStatus::ErrorVideoCore;
    }

+    // Create threads for CPU cores 1-3, and build thread_to_cpu map
+    // CPU core 0 is run on the main thread
+    thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
+    if (Settings::values.use_multi_core) {
+        for (size_t index = 0; index < cpu_core_threads.size(); ++index) {
+            cpu_core_threads[index] =
+                std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]);
+            thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1];
+        }
+    }
+
    NGLOG_DEBUG(Core, "Initialized OK");

    // Reset counters and set time origin to current frame
@@ -207,15 +219,30 @@ void System::Shutdown() {
    VideoCore::Shutdown();
    GDBStub::Shutdown();
    Service::Shutdown();
-    scheduler.reset();
    Kernel::Shutdown();
    HW::Shutdown();
    service_manager.reset();
    telemetry_session.reset();
    gpu_core.reset();
-    cpu_core.reset();
+
+    // Close all CPU/threading state
+    cpu_barrier->NotifyEnd();
+    if (Settings::values.use_multi_core) {
+        for (auto& thread : cpu_core_threads) {
+            thread->join();
+            thread.reset();
+        }
+    }
+    thread_to_cpu.clear();
+    for (auto& cpu_core : cpu_cores) {
+        cpu_core.reset();
+    }
+    cpu_barrier.reset();
+
+    // Close core timing
    CoreTiming::Shutdown();

+    // Close app loader
    app_loader.reset();

    NGLOG_DEBUG(Core, "Shutdown OK");
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -4,9 +4,12 @@

 #pragma once

+#include <array>
 #include <memory>
 #include <string>
+#include <thread>
 #include "common/common_types.h"
+#include "core/core_cpu.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/loader/loader.h"
@@ -89,7 +92,7 @@ public:
     * @returns True if the emulated system is powered on, otherwise false.
     */
    bool IsPoweredOn() const {
-        return cpu_core != nullptr;
+        return cpu_barrier && cpu_barrier->IsAlive();
    }

    /**
@@ -103,24 +106,34 @@ public:
    /// Prepare the core emulation for a reschedule
    void PrepareReschedule();

+    /// Gets and resets core performance statistics
    PerfStats::Results GetAndResetPerfStats();

-    /**
-     * Gets a reference to the emulated CPU.
-     * @returns A reference to the emulated CPU.
-     */
-    ARM_Interface& CPU() {
-        return *cpu_core;
+    /// Gets an ARM interface to the CPU core that is currently running
+    ARM_Interface& CurrentArmInterface() {
+        return CurrentCpuCore().ArmInterface();
    }

+    /// Gets an ARM interface to the CPU core with the specified index
+    ARM_Interface& ArmInterface(size_t core_index);
+
+    /// Gets a CPU interface to the CPU core with the specified index
+    Cpu& CpuCore(size_t core_index);
+
+    /// Gets the GPU interface
    Tegra::GPU& GPU() {
        return *gpu_core;
    }

-    Kernel::Scheduler& Scheduler() {
-        return *scheduler;
+    /// Gets the scheduler for the CPU core that is currently running
+    Kernel::Scheduler& CurrentScheduler() {
+        return *CurrentCpuCore().Scheduler();
    }

+    /// Gets the scheduler for the CPU core with the specified index
+    const std::shared_ptr<Kernel::Scheduler>& Scheduler(size_t core_index);
+
+    /// Gets the current process
    Kernel::SharedPtr<Kernel::Process>& CurrentProcess() {
        return current_process;
    }
@@ -155,6 +168,9 @@ public:
    }

 private:
+    /// Returns the currently running CPU core
+    Cpu& CurrentCpuCore();
+
    /**
     * Initialize the emulated system.
     * @param emu_window Pointer to the host-system window used for video output and keyboard input.
@@ -163,22 +179,15 @@ private:
     */
    ResultStatus Init(EmuWindow* emu_window, u32 system_mode);

-    /// Reschedule the core emulation
-    void Reschedule();
-
    /// AppLoader used to load the current executing application
    std::unique_ptr<Loader::AppLoader> app_loader;
-
-    std::shared_ptr<ARM_Interface> cpu_core;
-    std::unique_ptr<Kernel::Scheduler> scheduler;
    std::unique_ptr<Tegra::GPU> gpu_core;
-
    std::shared_ptr<Tegra::DebugContext> debug_context;
-
    Kernel::SharedPtr<Kernel::Process> current_process;
-
-    /// When true, signals that a reschedule should happen
-    bool reschedule_pending{};
+    std::shared_ptr<CpuBarrier> cpu_barrier;
+    std::array<std::shared_ptr<Cpu>, NUM_CPU_CORES> cpu_cores;
+    std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads;
+    size_t active_core{}; ///< Active core, only used in single thread mode

    /// Service manager
    std::shared_ptr<Service::SM::ServiceManager> service_manager;
@@ -190,10 +199,13 @@ private:

    ResultStatus status = ResultStatus::Success;
    std::string status_details = "";
+
+    /// Map of guest threads to CPU cores
+    std::map<std::thread::id, std::shared_ptr<Cpu>> thread_to_cpu;
 };

-inline ARM_Interface& CPU() {
-    return System::GetInstance().CPU();
+inline ARM_Interface& CurrentArmInterface() {
+    return System::GetInstance().CurrentArmInterface();
 }

 inline TelemetrySession& Telemetry() {
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -0,0 +1,119 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <condition_variable>
+#include <mutex>
+
+#include "common/logging/log.h"
+#ifdef ARCHITECTURE_x86_64
+#include "core/arm/dynarmic/arm_dynarmic.h"
+#endif
+#include "core/arm/unicorn/arm_unicorn.h"
+#include "core/core_cpu.h"
+#include "core/core_timing.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/thread.h"
+#include "core/settings.h"
+
+namespace Core {
+
+void CpuBarrier::NotifyEnd() {
+    std::unique_lock<std::mutex> lock(mutex);
+    end = true;
+    condition.notify_all();
+}
+
+bool CpuBarrier::Rendezvous() {
+    if (!Settings::values.use_multi_core) {
+        // Meaningless when running in single-core mode
+        return true;
+    }
+
+    if (!end) {
+        std::unique_lock<std::mutex> lock(mutex);
+
+        --cores_waiting;
+        if (!cores_waiting) {
+            cores_waiting = NUM_CPU_CORES;
+            condition.notify_all();
+            return true;
+        }
+
+        condition.wait(lock);
+        return true;
+    }
+
+    return false;
+}
+
+Cpu::Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index)
+    : cpu_barrier{std::move(cpu_barrier)}, core_index{core_index} {
+
+    if (Settings::values.use_cpu_jit) {
+#ifdef ARCHITECTURE_x86_64
+        arm_interface = std::make_shared<ARM_Dynarmic>();
+#else
+        cpu_core = std::make_shared<ARM_Unicorn>();
+        NGLOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
+#endif
+    } else {
+        arm_interface = std::make_shared<ARM_Unicorn>();
+    }
+
+    scheduler = std::make_shared<Kernel::Scheduler>(arm_interface.get());
+}
+
+void Cpu::RunLoop(bool tight_loop) {
+    // Wait for all other CPU cores to complete the previous slice, such that they run in lock-step
+    if (!cpu_barrier->Rendezvous()) {
+        // If rendezvous failed, session has been killed
+        return;
+    }
+
+    // If we don't have a currently active thread then don't execute instructions,
+    // instead advance to the next event and try to yield to the next thread
+    if (Kernel::GetCurrentThread() == nullptr) {
+        NGLOG_TRACE(Core, "Core-{} idling", core_index);
+
+        if (IsMainCore()) {
+            CoreTiming::Idle();
+            CoreTiming::Advance();
+        }
+
+        PrepareReschedule();
+    } else {
+        if (IsMainCore()) {
+            CoreTiming::Advance();
+        }
+
+        if (tight_loop) {
+            arm_interface->Run();
+        } else {
+            arm_interface->Step();
+        }
+    }
+
+    Reschedule();
+}
+
+void Cpu::SingleStep() {
+    return RunLoop(false);
+}
+
+void Cpu::PrepareReschedule() {
+    arm_interface->PrepareReschedule();
+    reschedule_pending = true;
+}
+
+void Cpu::Reschedule() {
+    if (!reschedule_pending) {
+        return;
+    }
+
+    reschedule_pending = false;
+    scheduler->Reschedule();
+}
+
+} // namespace Core
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -0,0 +1,78 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <string>
+#include "common/common_types.h"
+
+class ARM_Interface;
+
+namespace Kernel {
+class Scheduler;
+}
+
+namespace Core {
+
+constexpr unsigned NUM_CPU_CORES{4};
+
+class CpuBarrier {
+public:
+    bool IsAlive() const {
+        return !end;
+    }
+
+    void NotifyEnd();
+
+    bool Rendezvous();
+
+private:
+    unsigned cores_waiting{NUM_CPU_CORES};
+    std::mutex mutex;
+    std::condition_variable condition;
+    std::atomic<bool> end{};
+};
+
+class Cpu {
+public:
+    Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index);
+
+    void RunLoop(bool tight_loop = true);
+
+    void SingleStep();
+
+    void PrepareReschedule();
+
+    ARM_Interface& ArmInterface() {
+        return *arm_interface;
+    }
+
+    const ARM_Interface& ArmInterface() const {
+        return *arm_interface;
+    }
+
+    const std::shared_ptr<Kernel::Scheduler>& Scheduler() const {
+        return scheduler;
+    }
+
+    bool IsMainCore() const {
+        return core_index == 0;
+    }
+
+private:
+    void Reschedule();
+
+    std::shared_ptr<ARM_Interface> arm_interface;
+    std::shared_ptr<CpuBarrier> cpu_barrier;
+    std::shared_ptr<Kernel::Scheduler> scheduler;
+
+    bool reschedule_pending{};
+    size_t core_index;
+};
+
+} // namespace Core
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -598,11 +598,11 @@ static void ReadRegister() {
    }

    if (id <= SP_REGISTER) {
-        LongToGdbHex(reply, Core::CPU().GetReg(static_cast<int>(id)));
+        LongToGdbHex(reply, Core::CurrentArmInterface().GetReg(static_cast<int>(id)));
    } else if (id == PC_REGISTER) {
-        LongToGdbHex(reply, Core::CPU().GetPC());
+        LongToGdbHex(reply, Core::CurrentArmInterface().GetPC());
    } else if (id == CPSR_REGISTER) {
-        IntToGdbHex(reply, Core::CPU().GetCPSR());
+        IntToGdbHex(reply, Core::CurrentArmInterface().GetCPSR());
    } else {
        return SendReply("E01");
    }
@@ -618,16 +618,16 @@ static void ReadRegisters() {
    u8* bufptr = buffer;

    for (int reg = 0; reg <= SP_REGISTER; reg++) {
-        LongToGdbHex(bufptr + reg * 16, Core::CPU().GetReg(reg));
+        LongToGdbHex(bufptr + reg * 16, Core::CurrentArmInterface().GetReg(reg));
    }

    bufptr += (32 * 16);

-    LongToGdbHex(bufptr, Core::CPU().GetPC());
+    LongToGdbHex(bufptr, Core::CurrentArmInterface().GetPC());

    bufptr += 16;

-    IntToGdbHex(bufptr, Core::CPU().GetCPSR());
+    IntToGdbHex(bufptr, Core::CurrentArmInterface().GetCPSR());

    bufptr += 8;

@@ -646,11 +646,11 @@ static void WriteRegister() {
    }

    if (id <= SP_REGISTER) {
-        Core::CPU().SetReg(id, GdbHexToLong(buffer_ptr));
+        Core::CurrentArmInterface().SetReg(id, GdbHexToLong(buffer_ptr));
    } else if (id == PC_REGISTER) {
-        Core::CPU().SetPC(GdbHexToLong(buffer_ptr));
+        Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr));
    } else if (id == CPSR_REGISTER) {
-        Core::CPU().SetCPSR(GdbHexToInt(buffer_ptr));
+        Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr));
    } else {
        return SendReply("E01");
    }
@@ -667,11 +667,11 @@ static void WriteRegisters() {

    for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) {
        if (reg <= SP_REGISTER) {
-            Core::CPU().SetReg(reg, GdbHexToLong(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetReg(reg, GdbHexToLong(buffer_ptr + i * 16));
        } else if (reg == PC_REGISTER) {
-            Core::CPU().SetPC(GdbHexToLong(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr + i * 16));
        } else if (reg == CPSR_REGISTER) {
-            Core::CPU().SetCPSR(GdbHexToInt(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr + i * 16));
        } else {
            UNIMPLEMENTED();
        }
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -29,9 +29,14 @@ enum class ControlCommand : u32 {
 };

 enum class CommandType : u32 {
+    Invalid = 0,
+    LegacyRequest = 1,
    Close = 2,
+    LegacyControl = 3,
    Request = 4,
    Control = 5,
+    RequestWithContext = 6,
+    ControlWithContext = 7,
    Unspecified,
 };

--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -21,7 +21,9 @@ enum {

    // Confirmed Switch OS error codes
    MisalignedAddress = 102,
+    InvalidProcessorId = 113,
    InvalidHandle = 114,
+    InvalidCombination = 116,
    Timeout = 117,
    SynchronizationCanceled = 118,
    TooLarge = 119,
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -110,7 +110,9 @@ void HLERequestContext::ParseCommandBuffer(u32_le* src_cmdbuf, bool incoming) {
    // Padding to align to 16 bytes
    rp.AlignWithPadding();

-    if (Session()->IsDomain() && (command_header->type == IPC::CommandType::Request || !incoming)) {
+    if (Session()->IsDomain() && ((command_header->type == IPC::CommandType::Request ||
+                                   command_header->type == IPC::CommandType::RequestWithContext) ||
+                                  !incoming)) {
        // If this is an incoming message, only CommandType "Request" has a domain header
        // All outgoing domain messages have the domain header, if only incoming has it
        if (incoming || domain_message_header) {
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -104,7 +104,6 @@ ResultCode Mutex::Release(VAddr address) {

    // There are no more threads waiting for the mutex, release it completely.
    if (thread == nullptr) {
-        ASSERT(GetCurrentThread()->wait_mutex_threads.empty());
        Memory::Write32(address, 0);
        return RESULT_SUCCESS;
    }
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -9,6 +9,8 @@

 namespace Kernel {

+std::mutex Scheduler::scheduler_mutex;
+
 Scheduler::Scheduler(ARM_Interface* cpu_core) : cpu_core(cpu_core) {}

 Scheduler::~Scheduler() {
@@ -18,6 +20,7 @@ Scheduler::~Scheduler() {
 }

 bool Scheduler::HaveReadyThreads() {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
    return ready_queue.get_first() != nullptr;
 }

@@ -90,6 +93,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 }

 void Scheduler::Reschedule() {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    Thread* cur = GetCurrentThread();
    Thread* next = PopNextReadyThread();

@@ -105,26 +110,36 @@ void Scheduler::Reschedule() {
 }

 void Scheduler::AddThread(SharedPtr<Thread> thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    thread_list.push_back(thread);
    ready_queue.prepare(priority);
 }

 void Scheduler::RemoveThread(Thread* thread) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
                      thread_list.end());
 }

 void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    ASSERT(thread->status == THREADSTATUS_READY);
    ready_queue.push_back(priority, thread);
 }

 void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    ASSERT(thread->status == THREADSTATUS_READY);
    ready_queue.remove(priority, thread);
 }

 void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    // If thread was ready, adjust queues
    if (thread->status == THREADSTATUS_READY)
        ready_queue.move(thread, thread->current_priority, priority);
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <mutex>
 #include <vector>
 #include "common/common_types.h"
 #include "common/thread_queue_list.h"
@@ -68,6 +69,8 @@ private:
    SharedPtr<Thread> current_thread = nullptr;

    ARM_Interface* cpu_core;
+
+    static std::mutex scheduler_mutex;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -145,36 +145,6 @@ static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, SharedPtr<Thr
    return true;
 };

-/// Wait for a kernel object to synchronize, timeout after the specified nanoseconds
-static ResultCode WaitSynchronization1(
-    SharedPtr<WaitObject> object, Thread* thread, s64 nano_seconds = -1,
-    std::function<Thread::WakeupCallback> wakeup_callback = DefaultThreadWakeupCallback) {
-
-    if (!object) {
-        return ERR_INVALID_HANDLE;
-    }
-
-    if (object->ShouldWait(thread)) {
-        if (nano_seconds == 0) {
-            return RESULT_TIMEOUT;
-        }
-
-        thread->wait_objects = {object};
-        object->AddWaitingThread(thread);
-        thread->status = THREADSTATUS_WAIT_SYNCH_ANY;
-
-        // Create an event to wake the thread up after the specified nanosecond delay has passed
-        thread->WakeAfterDelay(nano_seconds);
-        thread->wakeup_callback = wakeup_callback;
-
-        Core::System::GetInstance().PrepareReschedule();
-    } else {
-        object->Acquire(thread);
-    }
-
-    return RESULT_SUCCESS;
-}
-
 /// Wait for the given handles to synchronize, timeout after the specified nanoseconds
 static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64 handle_count,
                                      s64 nano_seconds) {
@@ -232,7 +202,7 @@ static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64
    thread->WakeAfterDelay(nano_seconds);
    thread->wakeup_callback = DefaultThreadWakeupCallback;

-    Core::System::GetInstance().PrepareReschedule();
+    Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule();

    return RESULT_TIMEOUT;
 }
@@ -395,14 +365,14 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {

    thread->SetPriority(priority);

-    Core::System::GetInstance().PrepareReschedule();
+    Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule();
    return RESULT_SUCCESS;
 }

 /// Get which CPU core is executing the current thread
 static u32 GetCurrentProcessorNumber() {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, defaulting to processor 0");
-    return 0;
+    NGLOG_TRACE(Kernel_SVC, "called");
+    return GetCurrentThread()->processor_id;
 }

 static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size,
@@ -485,22 +455,28 @@ static void ExitProcess() {

    Core::CurrentProcess()->status = ProcessStatus::Exited;

-    // Stop all the process threads that are currently waiting for objects.
-    auto& thread_list = Core::System::GetInstance().Scheduler().GetThreadList();
-    for (auto& thread : thread_list) {
-        if (thread->owner_process != Core::CurrentProcess())
-            continue;
+    auto stop_threads = [](const std::vector<SharedPtr<Thread>>& thread_list) {
+        for (auto& thread : thread_list) {
+            if (thread->owner_process != Core::CurrentProcess())
+                continue;

-        if (thread == GetCurrentThread())
-            continue;
+            if (thread == GetCurrentThread())
+                continue;

-        // TODO(Subv): When are the other running/ready threads terminated?
-        ASSERT_MSG(thread->status == THREADSTATUS_WAIT_SYNCH_ANY ||
-                       thread->status == THREADSTATUS_WAIT_SYNCH_ALL,
-                   "Exiting processes with non-waiting threads is currently unimplemented");
+            // TODO(Subv): When are the other running/ready threads terminated?
+            ASSERT_MSG(thread->status == THREADSTATUS_WAIT_SYNCH_ANY ||
+                           thread->status == THREADSTATUS_WAIT_SYNCH_ALL,
+                       "Exiting processes with non-waiting threads is currently unimplemented");

-        thread->Stop();
-    }
+            thread->Stop();
+        }
+    };
+
+    auto& system = Core::System::GetInstance();
+    stop_threads(system.Scheduler(0)->GetThreadList());
+    stop_threads(system.Scheduler(1)->GetThreadList());
+    stop_threads(system.Scheduler(2)->GetThreadList());
+    stop_threads(system.Scheduler(3)->GetThreadList());

    // Kill the current thread
    GetCurrentThread()->Stop();
@@ -530,14 +506,9 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V

    switch (processor_id) {
    case THREADPROCESSORID_0:
-        break;
    case THREADPROCESSORID_1:
    case THREADPROCESSORID_2:
    case THREADPROCESSORID_3:
-        // TODO(bunnei): Implement support for other processor IDs
-        NGLOG_ERROR(Kernel_SVC,
-                    "Newly created thread must run in another thread ({}), unimplemented.",
-                    processor_id);
        break;
    default:
        ASSERT_MSG(false, "Unsupported thread processor ID: {}", processor_id);
@@ -551,6 +522,7 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
    *out_handle = thread->guest_handle;

    Core::System::GetInstance().PrepareReschedule();
+    Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule();

    NGLOG_TRACE(Kernel_SVC,
                "called entrypoint=0x{:08X} ({}), arg=0x{:08X}, stacktop=0x{:08X}, "
@@ -569,14 +541,17 @@ static ResultCode StartThread(Handle thread_handle) {
        return ERR_INVALID_HANDLE;
    }

+    ASSERT(thread->status == THREADSTATUS_DORMANT);
+
    thread->ResumeFromWait();
+    Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule();

    return RESULT_SUCCESS;
 }

 /// Called when a thread exits
 static void ExitThread() {
-    NGLOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CPU().GetPC());
+    NGLOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC());

    ExitCurrentThread();
    Core::System::GetInstance().PrepareReschedule();
@@ -588,7 +563,7 @@ static void SleepThread(s64 nanoseconds) {

    // Don't attempt to yield execution if there are no available threads to run,
    // this way we avoid a useless reschedule to the idle thread.
-    if (nanoseconds == 0 && !Core::System::GetInstance().Scheduler().HaveReadyThreads())
+    if (nanoseconds == 0 && !Core::System::GetInstance().CurrentScheduler().HaveReadyThreads())
        return;

    // Sleep current thread and check for next thread to schedule
@@ -624,7 +599,7 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var

    // Note: Deliberately don't attempt to inherit the lock owner's priority.

-    Core::System::GetInstance().PrepareReschedule();
+    Core::System::GetInstance().CpuCore(current_thread->processor_id).PrepareReschedule();
    return RESULT_SUCCESS;
 }

@@ -633,17 +608,43 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
    NGLOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
                condition_variable_addr, target);

-    u32 processed = 0;
-    auto& thread_list = Core::System::GetInstance().Scheduler().GetThreadList();
+    auto RetrieveWaitingThreads =
+        [](size_t core_index, std::vector<SharedPtr<Thread>>& waiting_threads, VAddr condvar_addr) {
+            const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+            auto& thread_list = scheduler->GetThreadList();

-    for (auto& thread : thread_list) {
-        if (thread->condvar_wait_address != condition_variable_addr)
-            continue;
+            for (auto& thread : thread_list) {
+                if (thread->condvar_wait_address == condvar_addr)
+                    waiting_threads.push_back(thread);
+            }
+        };

-        // Only process up to 'target' threads, unless 'target' is -1, in which case process
-        // them all.
-        if (target != -1 && processed >= target)
-            break;
+    // Retrieve a list of all threads that are waiting for this condition variable.
+    std::vector<SharedPtr<Thread>> waiting_threads;
+    RetrieveWaitingThreads(0, waiting_threads, condition_variable_addr);
+    RetrieveWaitingThreads(1, waiting_threads, condition_variable_addr);
+    RetrieveWaitingThreads(2, waiting_threads, condition_variable_addr);
+    RetrieveWaitingThreads(3, waiting_threads, condition_variable_addr);
+    // Sort them by priority, such that the highest priority ones come first.
+    std::sort(waiting_threads.begin(), waiting_threads.end(),
+              [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
+                  return lhs->current_priority < rhs->current_priority;
+              });
+
+    // Only process up to 'target' threads, unless 'target' is -1, in which case process
+    // them all.
+    size_t last = waiting_threads.size();
+    if (target != -1)
+        last = target;
+
+    // If there are no threads waiting on this condition variable, just exit
+    if (last > waiting_threads.size())
+        return RESULT_SUCCESS;
+
+    for (size_t index = 0; index < last; ++index) {
+        auto& thread = waiting_threads[index];
+
+        ASSERT(thread->condvar_wait_address == condition_variable_addr);

        // If the mutex is not yet acquired, acquire it.
        u32 mutex_val = Memory::Read32(thread->mutex_wait_address);
@@ -676,10 +677,8 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target

            owner->AddMutexWaiter(thread);

-            Core::System::GetInstance().PrepareReschedule();
+            Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule();
        }
-
-        ++processed;
    }

    return RESULT_SUCCESS;
@@ -718,16 +717,56 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
    return RESULT_SUCCESS;
 }

-static ResultCode GetThreadCoreMask(Handle handle, u32* mask, u64* unknown) {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, handle=0x{:08X}", handle);
-    *mask = 0x0;
-    *unknown = 0xf;
+static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) {
+    NGLOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
+
+    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    if (!thread) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    *core = thread->ideal_core;
+    *mask = thread->affinity_mask;
+
    return RESULT_SUCCESS;
 }

-static ResultCode SetThreadCoreMask(Handle handle, u32 mask, u64 unknown) {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, handle=0x{:08X}, mask=0x{:08X}, unknown=0x{:X}",
-                  handle, mask, unknown);
+static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
+    NGLOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:16X}, core=0x{:X}", thread_handle,
+                mask, core);
+
+    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    if (!thread) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (core == THREADPROCESSORID_DEFAULT) {
+        ASSERT(thread->owner_process->ideal_processor != THREADPROCESSORID_DEFAULT);
+        // Set the target CPU to the one specified in the process' exheader.
+        core = thread->owner_process->ideal_processor;
+        mask = 1 << core;
+    }
+
+    if (mask == 0) {
+        return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidCombination);
+    }
+
+    /// This value is used to only change the affinity mask without changing the current ideal core.
+    static constexpr u32 OnlyChangeMask = static_cast<u32>(-3);
+
+    if (core == OnlyChangeMask) {
+        core = thread->ideal_core;
+    } else if (core >= Core::NUM_CPU_CORES && core != -1) {
+        return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidProcessorId);
+    }
+
+    // Error out if the input core isn't enabled in the input mask.
+    if (core < Core::NUM_CPU_CORES && (mask & (1 << core)) == 0) {
+        return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidCombination);
+    }
+
+    thread->ChangeCore(core, mask);
+
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/kernel/svc.h
+++ b/src/core/hle/kernel/svc.h
@@ -47,9 +47,12 @@ enum class GetInfoType : u64 {
    NewMapRegionSize = 15,
    // 3.0.0+
    IsVirtualAddressMemoryEnabled = 16,
+    PersonalMmHeapUsage = 17,
    TitleId = 18,
    // 4.0.0+
    PrivilegedProcessId = 19,
+    // 5.0.0+
+    UserExceptionContextAddr = 20,
 };

 void CallSVC(u32 immediate);
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -13,14 +13,14 @@

 namespace Kernel {

-#define PARAM(n) Core::CPU().GetReg(n)
+#define PARAM(n) Core::CurrentArmInterface().GetReg(n)

 /**
 * HLE a function return from the current ARM userland process
 * @param res Result to return
 */
 static inline void FuncReturn(u64 res) {
-    Core::CPU().SetReg(0, res);
+    Core::CurrentArmInterface().SetReg(0, res);
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -45,7 +45,7 @@ template <ResultCode func(u32*, u32)>
 void SvcWrap() {
    u32 param_1 = 0;
    u32 retval = func(&param_1, (u32)PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@@ -53,7 +53,7 @@ template <ResultCode func(u32*, u64)>
 void SvcWrap() {
    u32 param_1 = 0;
    u32 retval = func(&param_1, PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@@ -66,7 +66,7 @@ template <ResultCode func(u64*, u64)>
 void SvcWrap() {
    u64 param_1 = 0;
    u32 retval = func(&param_1, PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@@ -85,8 +85,8 @@ void SvcWrap() {
    u32 param_1 = 0;
    u64 param_2 = 0;
    ResultCode retval = func((u32)(PARAM(2) & 0xFFFFFFFF), &param_1, &param_2);
-    Core::CPU().SetReg(1, param_1);
-    Core::CPU().SetReg(2, param_2);
+    Core::CurrentArmInterface().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(2, param_2);
    FuncReturn(retval.raw);
 }

@@ -120,7 +120,7 @@ template <ResultCode func(u32*, u64, u64, s64)>
 void SvcWrap() {
    u32 param_1 = 0;
    ResultCode retval = func(&param_1, PARAM(1), (u32)(PARAM(2) & 0xFFFFFFFF), (s64)PARAM(3));
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval.raw);
 }

@@ -133,7 +133,7 @@ template <ResultCode func(u64*, u64, u64, u64)>
 void SvcWrap() {
    u64 param_1 = 0;
    u32 retval = func(&param_1, PARAM(1), PARAM(2), PARAM(3)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@@ -143,7 +143,7 @@ void SvcWrap() {
    u32 retval =
        func(&param_1, PARAM(1), PARAM(2), PARAM(3), (u32)PARAM(4), (s32)(PARAM(5) & 0xFFFFFFFF))
            .raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@@ -166,7 +166,7 @@ template <ResultCode func(u32*, u64, u64, u32)>
 void SvcWrap() {
    u32 param_1 = 0;
    u32 retval = func(&param_1, PARAM(1), PARAM(2), (u32)(PARAM(3) & 0xFFFFFFFF)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@@ -175,7 +175,7 @@ void SvcWrap() {
    u32 param_1 = 0;
    u32 retval =
        func(&param_1, PARAM(1), (u32)(PARAM(2) & 0xFFFFFFFF), (u32)(PARAM(3) & 0xFFFFFFFF)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -64,7 +64,7 @@ void Thread::Stop() {
    // Clean up thread from ready queue
    // This is only needed when the thread is termintated forcefully (SVC TerminateProcess)
    if (status == THREADSTATUS_READY) {
-        Core::System::GetInstance().Scheduler().UnscheduleThread(this, current_priority);
+        scheduler->UnscheduleThread(this, current_priority);
    }

    status = THREADSTATUS_DEAD;
@@ -92,7 +92,7 @@ void WaitCurrentThread_Sleep() {
 void ExitCurrentThread() {
    Thread* thread = GetCurrentThread();
    thread->Stop();
-    Core::System::GetInstance().Scheduler().RemoveThread(thread);
+    Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
 }

 /**
@@ -133,8 +133,11 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {

        auto lock_owner = thread->lock_owner;
        // Threads waking up by timeout from WaitProcessWideKey do not perform priority inheritance
-        // and don't have a lock owner.
-        ASSERT(lock_owner == nullptr);
+        // and don't have a lock owner unless SignalProcessWideKey was called first and the thread
+        // wasn't awakened due to the mutex already being acquired.
+        if (lock_owner) {
+            lock_owner->RemoveMutexWaiter(thread);
+        }
    }

    if (resume)
@@ -154,6 +157,18 @@ void Thread::CancelWakeupTimer() {
    CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle);
 }

+static boost::optional<s32> GetNextProcessorId(u64 mask) {
+    for (s32 index = 0; index < Core::NUM_CPU_CORES; ++index) {
+        if (mask & (1ULL << index)) {
+            if (!Core::System().GetInstance().Scheduler(index)->GetCurrentThread()) {
+                // Core is enabled and not running any threads, use this one
+                return index;
+            }
+        }
+    }
+    return {};
+}
+
 void Thread::ResumeFromWait() {
    ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects");

@@ -188,8 +203,37 @@ void Thread::ResumeFromWait() {
    wakeup_callback = nullptr;

    status = THREADSTATUS_READY;
-    Core::System::GetInstance().Scheduler().ScheduleThread(this, current_priority);
-    Core::System::GetInstance().PrepareReschedule();
+
+    boost::optional<s32> new_processor_id = GetNextProcessorId(affinity_mask);
+    if (!new_processor_id) {
+        new_processor_id = processor_id;
+    }
+    if (ideal_core != -1 &&
+        Core::System().GetInstance().Scheduler(ideal_core)->GetCurrentThread() == nullptr) {
+        new_processor_id = ideal_core;
+    }
+
+    ASSERT(*new_processor_id < 4);
+
+    // Add thread to new core's scheduler
+    auto& next_scheduler = Core::System().GetInstance().Scheduler(*new_processor_id);
+
+    if (*new_processor_id != processor_id) {
+        // Remove thread from previous core's scheduler
+        scheduler->RemoveThread(this);
+        next_scheduler->AddThread(this, current_priority);
+    }
+
+    processor_id = *new_processor_id;
+
+    // If the thread was ready, unschedule from the previous core and schedule on the new core
+    scheduler->UnscheduleThread(this, current_priority);
+    next_scheduler->ScheduleThread(this, current_priority);
+
+    // Change thread's scheduler
+    scheduler = next_scheduler;
+
+    Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
 }

 /**
@@ -259,8 +303,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,

    SharedPtr<Thread> thread(new Thread);

-    Core::System::GetInstance().Scheduler().AddThread(thread, priority);
-
    thread->thread_id = NewThreadId();
    thread->status = THREADSTATUS_DORMANT;
    thread->entry_point = entry_point;
@@ -268,6 +310,8 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
    thread->nominal_priority = thread->current_priority = priority;
    thread->last_running_ticks = CoreTiming::GetTicks();
    thread->processor_id = processor_id;
+    thread->ideal_core = processor_id;
+    thread->affinity_mask = 1ULL << processor_id;
    thread->wait_objects.clear();
    thread->mutex_wait_address = 0;
    thread->condvar_wait_address = 0;
@@ -275,6 +319,8 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
    thread->name = std::move(name);
    thread->callback_handle = wakeup_callback_handle_table.Create(thread).Unwrap();
    thread->owner_process = owner_process;
+    thread->scheduler = Core::System().GetInstance().Scheduler(processor_id);
+    thread->scheduler->AddThread(thread, priority);

    // Find the next available TLS index, and mark it as used
    auto& tls_slots = owner_process->tls_slots;
@@ -337,7 +383,7 @@ void Thread::SetPriority(u32 priority) {
 }

 void Thread::BoostPriority(u32 priority) {
-    Core::System::GetInstance().Scheduler().SetThreadPriority(this, priority);
+    scheduler->SetThreadPriority(this, priority);
    current_priority = priority;
 }

@@ -406,7 +452,7 @@ void Thread::UpdatePriority() {
    if (new_priority == current_priority)
        return;

-    Core::System::GetInstance().Scheduler().SetThreadPriority(this, new_priority);
+    scheduler->SetThreadPriority(this, new_priority);

    current_priority = new_priority;

@@ -415,13 +461,54 @@ void Thread::UpdatePriority() {
        lock_owner->UpdatePriority();
 }

+void Thread::ChangeCore(u32 core, u64 mask) {
+    ideal_core = core;
+    affinity_mask = mask;
+
+    if (status != THREADSTATUS_READY) {
+        return;
+    }
+
+    boost::optional<s32> new_processor_id{GetNextProcessorId(affinity_mask)};
+
+    if (!new_processor_id) {
+        new_processor_id = processor_id;
+    }
+    if (ideal_core != -1 &&
+        Core::System().GetInstance().Scheduler(ideal_core)->GetCurrentThread() == nullptr) {
+        new_processor_id = ideal_core;
+    }
+
+    ASSERT(*new_processor_id < 4);
+
+    // Add thread to new core's scheduler
+    auto& next_scheduler = Core::System().GetInstance().Scheduler(*new_processor_id);
+
+    if (*new_processor_id != processor_id) {
+        // Remove thread from previous core's scheduler
+        scheduler->RemoveThread(this);
+        next_scheduler->AddThread(this, current_priority);
+    }
+
+    processor_id = *new_processor_id;
+
+    // If the thread was ready, unschedule from the previous core and schedule on the new core
+    scheduler->UnscheduleThread(this, current_priority);
+    next_scheduler->ScheduleThread(this, current_priority);
+
+    // Change thread's scheduler
+    scheduler = next_scheduler;
+
+    Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////

 /**
 * Gets the current thread
 */
 Thread* GetCurrentThread() {
-    return Core::System::GetInstance().Scheduler().GetCurrentThread();
+    return Core::System::GetInstance().CurrentScheduler().GetCurrentThread();
 }

 void ThreadingInit() {
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -56,6 +57,7 @@ enum class ThreadWakeupReason {
 namespace Kernel {

 class Process;
+class Scheduler;

 class Thread final : public WaitObject {
 public:
@@ -118,6 +120,9 @@ public:
    /// Recalculates the current priority taking into account priority inheritance.
    void UpdatePriority();

+    /// Changes the core that the thread is running or scheduled to run on.
+    void ChangeCore(u32 core, u64 mask);
+
    /**
     * Gets the thread's thread ID
     * @return The thread's ID
@@ -240,6 +245,11 @@ public:
    // available. In case of a timeout, the object will be nullptr.
    std::function<WakeupCallback> wakeup_callback;

+    std::shared_ptr<Scheduler> scheduler;
+
+    u32 ideal_core{0xFFFFFFFF};
+    u64 affinity_mask{0x1};
+
 private:
    Thread();
    ~Thread() override;
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -104,8 +104,15 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
    VirtualMemoryArea& final_vma = vma_handle->second;
    ASSERT(final_vma.size == size);

-    Core::CPU().MapBackingMemory(target, size, block->data() + offset,
-                                 VMAPermission::ReadWriteExecute);
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(2).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(3).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);

    final_vma.type = VMAType::AllocatedMemoryBlock;
    final_vma.permissions = VMAPermission::ReadWrite;
@@ -126,7 +133,11 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
    VirtualMemoryArea& final_vma = vma_handle->second;
    ASSERT(final_vma.size == size);

-    Core::CPU().MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(3).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);

    final_vma.type = VMAType::BackingMemory;
    final_vma.permissions = VMAPermission::ReadWrite;
@@ -184,7 +195,11 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {

    ASSERT(FindVMA(target)->second.size >= size);

-    Core::CPU().UnmapMemory(target, size);
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).UnmapMemory(target, size);
+    system.ArmInterface(1).UnmapMemory(target, size);
+    system.ArmInterface(2).UnmapMemory(target, size);
+    system.ArmInterface(3).UnmapMemory(target, size);

    return RESULT_SUCCESS;
 }
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -32,7 +32,8 @@ enum class ErrorModule : u32 {
    Common = 0,
    Kernel = 1,
    FS = 2,
-    NvidiaTransferMemory = 3,
+    OS = 3, // used for Memory, Thread, Mutex, Nvidia
+    HTCS = 4,
    NCM = 5,
    DD = 6,
    LR = 8,
@@ -42,41 +43,80 @@ enum class ErrorModule : u32 {
    PM = 15,
    NS = 16,
    HTC = 18,
+    NCMContent = 20,
    SM = 21,
    RO = 22,
    SDMMC = 24,
+    OVLN = 25,
    SPL = 26,
    ETHC = 100,
    I2C = 101,
+    GPIO = 102,
+    UART = 103,
    Settings = 105,
+    WLAN = 107,
+    XCD = 108,
    NIFM = 110,
-    Display = 114,
-    NTC = 116,
+    Hwopus = 111,
+    Bluetooth = 113,
+    VI = 114,
+    NFP = 115,
+    Time = 116,
    FGM = 117,
-    PCIE = 120,
+    OE = 118,
+    PCIe = 120,
    Friends = 121,
+    BCAT = 122,
    SSL = 123,
    Account = 124,
+    News = 125,
    Mii = 126,
+    NFC = 127,
    AM = 128,
    PlayReport = 129,
+    AHID = 130,
+    Qlaunch = 132,
    PCV = 133,
    OMM = 134,
+    BPC = 135,
+    PSM = 136,
    NIM = 137,
    PSC = 138,
+    TC = 139,
    USB = 140,
+    NSD = 141,
+    PCTL = 142,
    BTM = 143,
+    ETicket = 145,
+    NGC = 146,
    ERPT = 147,
    APM = 148,
+    Profiler = 150,
+    ErrorUpload = 151,
+    Audio = 153,
    NPNS = 154,
+    NPNSHTTPSTREAM = 155,
    ARP = 157,
-    BOOT = 158,
-    NFC = 161,
+    SWKBD = 158,
+    BOOT = 159,
+    NFCMifare = 161,
    UserlandAssert = 162,
+    Fatal = 163,
+    NIMShop = 164,
+    SPSM = 165,
+    BGTC = 167,
    UserlandCrash = 168,
-    HID = 203,
+    SREPO = 180,
+    Dauth = 181,
+    HID = 202,
+    LDN = 203,
+    Irsensor = 205,
    Capture = 206,
-    TC = 651,
+    Manu = 208,
+    ATK = 209,
+    GRC = 212,
+    Migration = 216,
+    MigrationLdcServ = 217,
    GeneralWebApplet = 800,
    WifiWebAuthApplet = 809,
    WhitelistedApplet = 810,
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -74,7 +74,40 @@ void IAudioController::GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestCo
    rb.Push(volume);
 }

-IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") {}
+IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") {
+    static const FunctionInfo functions[] = {
+        {0, nullptr, "GetLastForegroundCaptureImage"},
+        {1, nullptr, "UpdateLastForegroundCaptureImage"},
+        {2, nullptr, "GetLastApplicationCaptureImage"},
+        {3, nullptr, "GetCallerAppletCaptureImage"},
+        {4, nullptr, "UpdateCallerAppletCaptureImage"},
+        {5, nullptr, "GetLastForegroundCaptureImageEx"},
+        {6, nullptr, "GetLastApplicationCaptureImageEx"},
+        {7, nullptr, "GetCallerAppletCaptureImageEx"},
+        {8, nullptr, "TakeScreenShotOfOwnLayer"},  // 2.0.0+
+        {9, nullptr, "CopyBetweenCaptureBuffers"}, // 5.0.0+
+        {10, nullptr, "AcquireLastApplicationCaptureBuffer"},
+        {11, nullptr, "ReleaseLastApplicationCaptureBuffer"},
+        {12, nullptr, "AcquireLastForegroundCaptureBuffer"},
+        {13, nullptr, "ReleaseLastForegroundCaptureBuffer"},
+        {14, nullptr, "AcquireCallerAppletCaptureBuffer"},
+        {15, nullptr, "ReleaseCallerAppletCaptureBuffer"},
+        {16, nullptr, "AcquireLastApplicationCaptureBufferEx"},
+        {17, nullptr, "AcquireLastForegroundCaptureBufferEx"},
+        {18, nullptr, "AcquireCallerAppletCaptureBufferEx"},
+        // 2.0.0+
+        {20, nullptr, "ClearCaptureBuffer"},
+        {21, nullptr, "ClearAppletTransitionBuffer"},
+        // 4.0.0+
+        {22, nullptr, "AcquireLastApplicationCaptureSharedBuffer"},
+        {23, nullptr, "ReleaseLastApplicationCaptureSharedBuffer"},
+        {24, nullptr, "AcquireLastForegroundCaptureSharedBuffer"},
+        {25, nullptr, "ReleaseLastForegroundCaptureSharedBuffer"},
+        {26, nullptr, "AcquireCallerAppletCaptureSharedBuffer"},
+        {27, nullptr, "ReleaseCallerAppletCaptureSharedBuffer"},
+    };
+    RegisterHandlers(functions);
+}

 IDebugFunctions::IDebugFunctions() : ServiceFramework("IDebugFunctions") {}

@@ -457,7 +490,7 @@ IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationF
        {20, &IApplicationFunctions::EnsureSaveData, "EnsureSaveData"},
        {21, &IApplicationFunctions::GetDesiredLanguage, "GetDesiredLanguage"},
        {22, &IApplicationFunctions::SetTerminateResult, "SetTerminateResult"},
-        {23, nullptr, "GetDisplayVersion"},
+        {23, &IApplicationFunctions::GetDisplayVersion, "GetDisplayVersion"},
        {24, nullptr, "GetLaunchStorageInfoForDebug"},
        {25, nullptr, "ExtendSaveData"},
        {26, nullptr, "GetSaveDataSize"},
@@ -552,6 +585,14 @@ void IApplicationFunctions::SetTerminateResult(Kernel::HLERequestContext& ctx) {
    NGLOG_WARNING(Service_AM, "(STUBBED) called, result=0x{:08X}", result);
 }

+void IApplicationFunctions::GetDisplayVersion(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 6};
+    rb.Push(RESULT_SUCCESS);
+    rb.Push<u64>(1);
+    rb.Push<u64>(0);
+    NGLOG_WARNING(Service_AM, "(STUBBED) called");
+}
+
 void IApplicationFunctions::GetDesiredLanguage(Kernel::HLERequestContext& ctx) {
    // TODO(bunnei): This should be configurable
    IPC::ResponseBuilder rb{ctx, 4};
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -18,10 +18,25 @@ class NVFlinger;

 namespace AM {

-// TODO: Add more languages
 enum SystemLanguage {
    Japanese = 0,
-    English = 1,
+    English = 1, // en-US
+    French = 2,
+    German = 3,
+    Italian = 4,
+    Spanish = 5,
+    Chinese = 6,
+    Korean = 7,
+    Dutch = 8,
+    Portuguese = 9,
+    Russian = 10,
+    Taiwanese = 11,
+    BritishEnglish = 12, // en-GB
+    CanadianFrench = 13,
+    LatinAmericanSpanish = 14, // es-419
+    // 4.0.0+
+    SimplifiedChinese = 15,
+    TraditionalChinese = 16,
 };

 class IWindowController final : public ServiceFramework<IWindowController> {
@@ -117,6 +132,7 @@ private:
    void CreateApplicationAndRequestToStartForQuest(Kernel::HLERequestContext& ctx);
    void EnsureSaveData(Kernel::HLERequestContext& ctx);
    void SetTerminateResult(Kernel::HLERequestContext& ctx);
+    void GetDisplayVersion(Kernel::HLERequestContext& ctx);
    void GetDesiredLanguage(Kernel::HLERequestContext& ctx);
    void InitializeGamePlayRecording(Kernel::HLERequestContext& ctx);
    void SetGamePlayRecordingState(Kernel::HLERequestContext& ctx);
--- a/src/core/hle/service/am/applet_oe.cpp
+++ b/src/core/hle/service/am/applet_oe.cpp
@@ -98,7 +98,7 @@ void AppletOE::OpenApplicationProxy(Kernel::HLERequestContext& ctx) {
 AppletOE::AppletOE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)
    : ServiceFramework("appletOE"), nvflinger(std::move(nvflinger)) {
    static const FunctionInfo functions[] = {
-        {0x00000000, &AppletOE::OpenApplicationProxy, "OpenApplicationProxy"},
+        {0, &AppletOE::OpenApplicationProxy, "OpenApplicationProxy"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -35,10 +35,8 @@ public:

 AudInU::AudInU() : ServiceFramework("audin:u") {
    static const FunctionInfo functions[] = {
-        {0, nullptr, "ListAudioIns"},
-        {1, nullptr, "OpenAudioIn"},
-        {3, nullptr, "OpenAudioInAuto"},
-        {4, nullptr, "ListAudioInsAuto"},
+        {0, nullptr, "ListAudioIns"},    {1, nullptr, "OpenAudioIn"},      {2, nullptr, "Unknown"},
+        {3, nullptr, "OpenAudioInAuto"}, {4, nullptr, "ListAudioInsAuto"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -196,10 +196,10 @@ void AudOutU::OpenAudioOut(Kernel::HLERequestContext& ctx) {
 }

 AudOutU::AudOutU() : ServiceFramework("audout:u") {
-    static const FunctionInfo functions[] = {{0x00000000, &AudOutU::ListAudioOuts, "ListAudioOuts"},
-                                             {0x00000001, &AudOutU::OpenAudioOut, "OpenAudioOut"},
-                                             {0x00000002, nullptr, "ListAudioOutsAuto"},
-                                             {0x00000003, nullptr, "OpenAudioOutAuto"}};
+    static const FunctionInfo functions[] = {{0, &AudOutU::ListAudioOuts, "ListAudioOuts"},
+                                             {1, &AudOutU::OpenAudioOut, "OpenAudioOut"},
+                                             {2, nullptr, "ListAudioOutsAuto"},
+                                             {3, nullptr, "OpenAudioOutAuto"}};
    RegisterHandlers(functions);
 }

--- a/src/core/hle/service/audio/audrec_u.cpp
+++ b/src/core/hle/service/audio/audrec_u.cpp
@@ -20,6 +20,7 @@ public:
            {4, nullptr, "RegisterBufferEvent"},
            {5, nullptr, "GetReleasedFinalOutputRecorderBuffer"},
            {6, nullptr, "ContainsFinalOutputRecorderBuffer"},
+            {7, nullptr, "Unknown"},
            {8, nullptr, "AppendFinalOutputRecorderBufferAuto"},
            {9, nullptr, "GetReleasedFinalOutputRecorderBufferAuto"},
        };
@@ -30,7 +31,7 @@ public:

 AudRecU::AudRecU() : ServiceFramework("audrec:u") {
    static const FunctionInfo functions[] = {
-        {0x00000000, nullptr, "OpenFinalOutputRecorder"},
+        {0, nullptr, "OpenFinalOutputRecorder"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/alignment.h"
 #include "common/logging/log.h"
 #include "core/core_timing.h"
 #include "core/hle/ipc_helpers.h"
@@ -156,19 +157,20 @@ class IAudioDevice final : public ServiceFramework<IAudioDevice> {
 public:
    IAudioDevice() : ServiceFramework("IAudioDevice") {
        static const FunctionInfo functions[] = {
-            {0x0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"},
-            {0x1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"},
-            {0x2, nullptr, "GetAudioDeviceOutputVolume"},
-            {0x3, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceName"},
-            {0x4, &IAudioDevice::QueryAudioDeviceSystemEvent, "QueryAudioDeviceSystemEvent"},
-            {0x5, &IAudioDevice::GetActiveChannelCount, "GetActiveChannelCount"},
-            {0x6, &IAudioDevice::ListAudioDeviceName,
+            {0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"},
+            {1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"},
+            {2, nullptr, "GetAudioDeviceOutputVolume"},
+            {3, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceName"},
+            {4, &IAudioDevice::QueryAudioDeviceSystemEvent, "QueryAudioDeviceSystemEvent"},
+            {5, &IAudioDevice::GetActiveChannelCount, "GetActiveChannelCount"},
+            {6, &IAudioDevice::ListAudioDeviceName,
             "ListAudioDeviceNameAuto"}, // TODO(ogniK): Confirm if autos are identical to non auto
-            {0x7, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolumeAuto"},
-            {0x8, nullptr, "GetAudioDeviceOutputVolumeAuto"},
-            {0xa, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceNameAuto"},
-            {0xb, nullptr, "QueryAudioDeviceInputEvent"},
-            {0xc, nullptr, "QueryAudioDeviceOutputEvent"}};
+            {7, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolumeAuto"},
+            {8, nullptr, "GetAudioDeviceOutputVolumeAuto"},
+            {10, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceNameAuto"},
+            {11, nullptr, "QueryAudioDeviceInputEvent"},
+            {12, nullptr, "QueryAudioDeviceOutputEvent"},
+        };
        RegisterHandlers(functions);

        buffer_event =
@@ -255,12 +257,62 @@ void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
 }

 void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    auto params = rp.PopRaw<WorkerBufferParameters>();
+
+    u64 buffer_sz = Common::AlignUp(4 * params.unknown8, 0x40);
+    buffer_sz += params.unknownC * 1024;
+    buffer_sz += 0x940 * (params.unknownC + 1);
+    buffer_sz += 0x3F0 * params.voice_count;
+    buffer_sz += Common::AlignUp(8 * (params.unknownC + 1), 0x10);
+    buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
+    buffer_sz +=
+        Common::AlignUp((0x3C0 * (params.sink_count + params.unknownC) + 4 * params.sample_count) *
+                            (params.unknown8 + 6),
+                        0x40);
+
+    if (IsFeatureSupported(AudioFeatures::Splitter, params.magic)) {
+        u32 count = params.unknownC + 1;
+        u64 node_count = Common::AlignUp(count, 0x40);
+        u64 node_state_buffer_sz =
+            4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
+        u64 edge_matrix_buffer_sz = 0;
+        node_count = Common::AlignUp(count * count, 0x40);
+        if (node_count >> 31 != 0) {
+            edge_matrix_buffer_sz = (node_count | 7) / 8;
+        } else {
+            edge_matrix_buffer_sz = node_count / 8;
+        }
+        buffer_sz += Common::AlignUp(node_state_buffer_sz + edge_matrix_buffer_sz, 0x10);
+    }
+
+    buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
+    if (IsFeatureSupported(AudioFeatures::Splitter, params.magic)) {
+        buffer_sz += 0xE0 * params.unknown2c;
+        buffer_sz += 0x20 * params.splitter_count;
+        buffer_sz += Common::AlignUp(4 * params.unknown2c, 0x10);
+    }
+    buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
+    u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
+                    ((params.voice_count * 256) | 0x40);
+
+    if (params.unknown1c >= 1) {
+        output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
+                                      16 * params.voice_count + 16) +
+                                     0x658) *
+                                            (params.unknown1c + 1) +
+                                        0xc0,
+                                    0x40) +
+                    output_sz;
+    }
+    output_sz = Common::AlignUp(output_sz + 0x1807e, 0x1000);
+
    IPC::ResponseBuilder rb{ctx, 4};

    rb.Push(RESULT_SUCCESS);
-    rb.Push<u64>(0x4000);
+    rb.Push<u64>(output_sz);

-    NGLOG_WARNING(Service_Audio, "(STUBBED) called");
+    NGLOG_DEBUG(Service_Audio, "called, buffer_size=0x{:X}", output_sz);
 }

 void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) {
@@ -272,4 +324,14 @@ void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) {
    NGLOG_DEBUG(Service_Audio, "called");
 }

+bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
+    u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
+    switch (feature) {
+    case AudioFeatures::Splitter:
+        return version_num >= 2;
+    default:
+        return false;
+    }
+}
+
 } // namespace Service::Audio
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -21,6 +21,31 @@ private:
    void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
    void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
    void GetAudioDevice(Kernel::HLERequestContext& ctx);
+
+    struct WorkerBufferParameters {
+        u32_le sample_rate;
+        u32_le sample_count;
+        u32_le unknown8;
+        u32_le unknownC;
+        u32_le voice_count;
+        u32_le sink_count;
+        u32_le effect_count;
+        u32_le unknown1c;
+        u8 unknown20;
+        u8 padding1[3];
+        u32_le splitter_count;
+        u32_le unknown2c;
+        u8 padding2[4];
+        u32_le magic;
+    };
+    static_assert(sizeof(WorkerBufferParameters) == 52,
+                  "WorkerBufferParameters is an invalid size");
+
+    enum class AudioFeatures : u32 {
+        Splitter,
+    };
+
+    bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const;
 };

 } // namespace Service::Audio
--- a/src/core/hle/service/audio/codecctl.cpp
+++ b/src/core/hle/service/audio/codecctl.cpp
@@ -11,19 +11,19 @@ namespace Service::Audio {

 CodecCtl::CodecCtl() : ServiceFramework("codecctl") {
    static const FunctionInfo functions[] = {
-        {0x00000000, nullptr, "InitializeCodecController"},
-        {0x00000001, nullptr, "FinalizeCodecController"},
-        {0x00000002, nullptr, "SleepCodecController"},
-        {0x00000003, nullptr, "WakeCodecController"},
-        {0x00000004, nullptr, "SetCodecVolume"},
-        {0x00000005, nullptr, "GetCodecVolumeMax"},
-        {0x00000006, nullptr, "GetCodecVolumeMin"},
-        {0x00000007, nullptr, "SetCodecActiveTarget"},
-        {0x00000008, nullptr, "Unknown"},
-        {0x00000009, nullptr, "BindCodecHeadphoneMicJackInterrupt"},
-        {0x00000010, nullptr, "IsCodecHeadphoneMicJackInserted"},
-        {0x00000011, nullptr, "ClearCodecHeadphoneMicJackInterrupt"},
-        {0x00000012, nullptr, "IsCodecDeviceRequested"},
+        {0, nullptr, "InitializeCodecController"},
+        {1, nullptr, "FinalizeCodecController"},
+        {2, nullptr, "SleepCodecController"},
+        {3, nullptr, "WakeCodecController"},
+        {4, nullptr, "SetCodecVolume"},
+        {5, nullptr, "GetCodecVolumeMax"},
+        {6, nullptr, "GetCodecVolumeMin"},
+        {7, nullptr, "SetCodecActiveTarget"},
+        {8, nullptr, "GetCodecActiveTarget"},
+        {9, nullptr, "BindCodecHeadphoneMicJackInterrupt"},
+        {10, nullptr, "IsCodecHeadphoneMicJackInserted"},
+        {11, nullptr, "ClearCodecHeadphoneMicJackInterrupt"},
+        {12, nullptr, "IsCodecDeviceRequested"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/bcat/bcat.cpp
+++ b/src/core/hle/service/bcat/bcat.cpp
@@ -0,0 +1,16 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/service/bcat/bcat.h"
+
+namespace Service::BCAT {
+
+BCAT::BCAT(std::shared_ptr<Module> module, const char* name)
+    : Module::Interface(std::move(module), name) {
+    static const FunctionInfo functions[] = {
+        {0, &BCAT::CreateBcatService, "CreateBcatService"},
+    };
+    RegisterHandlers(functions);
+}
+} // namespace Service::BCAT
--- a/src/core/hle/service/bcat/bcat.h
+++ b/src/core/hle/service/bcat/bcat.h
@@ -0,0 +1,16 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/service/bcat/module.h"
+
+namespace Service::BCAT {
+
+class BCAT final : public Module::Interface {
+public:
+    explicit BCAT(std::shared_ptr<Module> module, const char* name);
+};
+
+} // namespace Service::BCAT
--- a/src/core/hle/service/bcat/module.cpp
+++ b/src/core/hle/service/bcat/module.cpp
@@ -0,0 +1,53 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/service/bcat/bcat.h"
+#include "core/hle/service/bcat/module.h"
+
+namespace Service::BCAT {
+
+class IBcatService final : public ServiceFramework<IBcatService> {
+public:
+    IBcatService() : ServiceFramework("IBcatService") {
+        static const FunctionInfo functions[] = {
+            {10100, nullptr, "RequestSyncDeliveryCache"},
+            {10101, nullptr, "RequestSyncDeliveryCacheWithDirectoryName"},
+            {10200, nullptr, "CancelSyncDeliveryCacheRequest"},
+            {20100, nullptr, "RequestSyncDeliveryCacheWithApplicationId"},
+            {20101, nullptr, "RequestSyncDeliveryCacheWithApplicationIdAndDirectoryName"},
+            {30100, nullptr, "SetPassphrase"},
+            {30200, nullptr, "RegisterBackgroundDeliveryTask"},
+            {30201, nullptr, "UnregisterBackgroundDeliveryTask"},
+            {30202, nullptr, "BlockDeliveryTask"},
+            {30203, nullptr, "UnblockDeliveryTask"},
+            {90100, nullptr, "EnumerateBackgroundDeliveryTask"},
+            {90200, nullptr, "GetDeliveryList"},
+            {90201, nullptr, "ClearDeliveryCacheStorage"},
+            {90300, nullptr, "GetPushNotificationLog"},
+        };
+        RegisterHandlers(functions);
+    }
+};
+
+void Module::Interface::CreateBcatService(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushIpcInterface<IBcatService>();
+    NGLOG_DEBUG(Service_BCAT, "called");
+}
+
+Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
+    : ServiceFramework(name), module(std::move(module)) {}
+
+void InstallInterfaces(SM::ServiceManager& service_manager) {
+    auto module = std::make_shared<Module>();
+    std::make_shared<BCAT>(module, "bcat:a")->InstallAsService(service_manager);
+    std::make_shared<BCAT>(module, "bcat:m")->InstallAsService(service_manager);
+    std::make_shared<BCAT>(module, "bcat:u")->InstallAsService(service_manager);
+    std::make_shared<BCAT>(module, "bcat:s")->InstallAsService(service_manager);
+}
+
+} // namespace Service::BCAT
--- a/src/core/hle/service/bcat/module.h
+++ b/src/core/hle/service/bcat/module.h
@@ -0,0 +1,27 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/service/service.h"
+
+namespace Service::BCAT {
+
+class Module final {
+public:
+    class Interface : public ServiceFramework<Interface> {
+    public:
+        Interface(std::shared_ptr<Module> module, const char* name);
+
+        void CreateBcatService(Kernel::HLERequestContext& ctx);
+
+    protected:
+        std::shared_ptr<Module> module;
+    };
+};
+
+/// Registers all BCAT services with the specified service manager.
+void InstallInterfaces(SM::ServiceManager& service_manager);
+
+} // namespace Service::BCAT
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -13,7 +13,7 @@ namespace Service::Fatal {
 Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
    : ServiceFramework(name), module(std::move(module)) {}

-void Module::Interface::FatalSimple(Kernel::HLERequestContext& ctx) {
+void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx);
    u32 error_code = rp.Pop<u32>();
    NGLOG_WARNING(Service_Fatal, "(STUBBED) called, error_code=0x{:X}", error_code);
@@ -21,7 +21,7 @@ void Module::Interface::FatalSimple(Kernel::HLERequestContext& ctx) {
    rb.Push(RESULT_SUCCESS);
 }

-void Module::Interface::TransitionToFatalError(Kernel::HLERequestContext& ctx) {
+void Module::Interface::ThrowFatalWithCpuContext(Kernel::HLERequestContext& ctx) {
    NGLOG_WARNING(Service_Fatal, "(STUBBED) called");
    IPC::ResponseBuilder rb{ctx, 2};
    rb.Push(RESULT_SUCCESS);
--- a/src/core/hle/service/fatal/fatal.h
+++ b/src/core/hle/service/fatal/fatal.h
@@ -14,8 +14,8 @@ public:
    public:
        Interface(std::shared_ptr<Module> module, const char* name);

-        void FatalSimple(Kernel::HLERequestContext& ctx);
-        void TransitionToFatalError(Kernel::HLERequestContext& ctx);
+        void ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx);
+        void ThrowFatalWithCpuContext(Kernel::HLERequestContext& ctx);

    protected:
        std::shared_ptr<Module> module;
--- a/src/core/hle/service/fatal/fatal_u.cpp
+++ b/src/core/hle/service/fatal/fatal_u.cpp
@@ -8,8 +8,9 @@ namespace Service::Fatal {

 Fatal_U::Fatal_U(std::shared_ptr<Module> module) : Module::Interface(std::move(module), "fatal:u") {
    static const FunctionInfo functions[] = {
-        {1, &Fatal_U::FatalSimple, "FatalSimple"},
-        {2, &Fatal_U::TransitionToFatalError, "TransitionToFatalError"},
+        {0, nullptr, "ThrowFatal"},
+        {1, &Fatal_U::ThrowFatalWithPolicy, "ThrowFatalWithPolicy"},
+        {2, &Fatal_U::ThrowFatalWithCpuContext, "ThrowFatalWithCpuContext"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -329,6 +329,7 @@ public:
            {130, nullptr, "SwapNpadAssignment"},
            {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"},
            {132, nullptr, "EnableUnintendedHomeButtonInputProtection"},
+            {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"},
            {200, &Hid::GetVibrationDeviceInfo, "GetVibrationDeviceInfo"},
            {201, &Hid::SendVibrationValue, "SendVibrationValue"},
            {202, &Hid::GetActualVibrationValue, "GetActualVibrationValue"},
@@ -336,12 +337,41 @@ public:
            {204, nullptr, "PermitVibration"},
            {205, nullptr, "IsVibrationPermitted"},
            {206, &Hid::SendVibrationValues, "SendVibrationValues"},
+            {207, nullptr, "SendVibrationGcErmCommand"},
+            {208, nullptr, "GetActualVibrationGcErmCommand"},
+            {209, nullptr, "BeginPermitVibrationSession"},
+            {210, nullptr, "EndPermitVibrationSession"},
            {300, nullptr, "ActivateConsoleSixAxisSensor"},
            {301, nullptr, "StartConsoleSixAxisSensor"},
            {302, nullptr, "StopConsoleSixAxisSensor"},
+            {303, nullptr, "ActivateSevenSixAxisSensor"},
+            {304, nullptr, "StartSevenSixAxisSensor"},
+            {305, nullptr, "StopSevenSixAxisSensor"},
+            {306, nullptr, "InitializeSevenSixAxisSensor"},
+            {307, nullptr, "FinalizeSevenSixAxisSensor"},
+            {308, nullptr, "SetSevenSixAxisSensorFusionStrength"},
+            {309, nullptr, "GetSevenSixAxisSensorFusionStrength"},
            {400, nullptr, "IsUsbFullKeyControllerEnabled"},
            {401, nullptr, "EnableUsbFullKeyController"},
            {402, nullptr, "IsUsbFullKeyControllerConnected"},
+            {403, nullptr, "HasBattery"},
+            {404, nullptr, "HasLeftRightBattery"},
+            {405, nullptr, "GetNpadInterfaceType"},
+            {406, nullptr, "GetNpadLeftRightInterfaceType"},
+            {500, nullptr, "GetPalmaConnectionHandle"},
+            {501, nullptr, "InitializePalma"},
+            {502, nullptr, "AcquirePalmaOperationCompleteEvent"},
+            {503, nullptr, "GetPalmaOperationInfo"},
+            {504, nullptr, "PlayPalmaActivity"},
+            {505, nullptr, "SetPalmaFrModeType"},
+            {506, nullptr, "ReadPalmaStep"},
+            {507, nullptr, "EnablePalmaStep"},
+            {508, nullptr, "SuspendPalmaStep"},
+            {509, nullptr, "ResetPalmaStep"},
+            {510, nullptr, "ReadPalmaApplicationSection"},
+            {511, nullptr, "WritePalmaApplicationSection"},
+            {512, nullptr, "ReadPalmaUniqueCode"},
+            {513, nullptr, "SetPalmaUniqueCodeInvalid"},
            {1000, nullptr, "SetNpadCommunicationMode"},
            {1001, nullptr, "GetNpadCommunicationMode"},
        };
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -12,7 +12,7 @@ namespace Service::HID {
 // Begin enums and output structs

 constexpr u32 HID_NUM_ENTRIES = 17;
-constexpr u32 HID_NUM_LAYOUTS = 7;
+constexpr u32 HID_NUM_LAYOUTS = 2;
 constexpr s32 HID_JOYSTICK_MAX = 0x8000;
 constexpr s32 HID_JOYSTICK_MIN = -0x8000;

--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -17,30 +17,30 @@ public:
    IUser() : ServiceFramework("IUser") {
        static const FunctionInfo functions[] = {
            {0, &IUser::Initialize, "Initialize"},
-            {1, nullptr, "Unknown1"},
-            {2, nullptr, "Unknown2"},
-            {3, nullptr, "Unknown3"},
-            {4, nullptr, "Unknown4"},
-            {5, nullptr, "Unknown5"},
-            {6, nullptr, "Unknown6"},
-            {7, nullptr, "Unknown7"},
-            {8, nullptr, "Unknown8"},
-            {9, nullptr, "Unknown9"},
-            {10, nullptr, "Unknown10"},
-            {11, nullptr, "Unknown11"},
-            {12, nullptr, "Unknown12"},
-            {13, nullptr, "Unknown13"},
-            {14, nullptr, "Unknown14"},
-            {15, nullptr, "Unknown15"},
-            {16, nullptr, "Unknown16"},
-            {17, nullptr, "Unknown17"},
-            {18, nullptr, "Unknown18"},
-            {19, nullptr, "Unknown19"},
-            {20, nullptr, "Unknown20"},
-            {21, nullptr, "Unknown21"},
-            {22, nullptr, "Unknown22"},
-            {23, nullptr, "Unknown23"},
-            {24, nullptr, "Unknown24"},
+            {1, nullptr, "Finalize"},
+            {2, nullptr, "ListDevices"},
+            {3, nullptr, "StartDetection"},
+            {4, nullptr, "StopDetection"},
+            {5, nullptr, "Mount"},
+            {6, nullptr, "Unmount"},
+            {7, nullptr, "OpenApplicationArea"},
+            {8, nullptr, "GetApplicationArea"},
+            {9, nullptr, "SetApplicationArea"},
+            {10, nullptr, "Flush"},
+            {11, nullptr, "Restore"},
+            {12, nullptr, "CreateApplicationArea"},
+            {13, nullptr, "GetTagInfo"},
+            {14, nullptr, "GetRegisterInfo"},
+            {15, nullptr, "GetCommonInfo"},
+            {16, nullptr, "GetModelInfo"},
+            {17, nullptr, "AttachActivateEvent"},
+            {18, nullptr, "AttachDeactivateEvent"},
+            {19, nullptr, "GetState"},
+            {20, nullptr, "GetDeviceState"},
+            {21, nullptr, "GetNpadId"},
+            {22, nullptr, "GetApplicationArea2"},
+            {23, nullptr, "AttachAvailabilityChangeEvent"},
+            {24, nullptr, "RecreateApplicationArea"},
        };
        RegisterHandlers(functions);
    }
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -26,6 +26,8 @@ u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vecto
        return BindChannel(input, output);
    case IoctlCommand::IocGetVaRegionsCommand:
        return GetVARegions(input, output);
+    case IoctlCommand::IocUnmapBufferCommand:
+        return UnmapBuffer(input, output);
    }

    if (static_cast<IoctlCommand>(command.cmd.Value()) == IoctlCommand::IocRemapCommand)
@@ -125,6 +127,37 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
        params.offset = gpu.memory_manager->MapBufferEx(object->addr, object->size);
    }

+    // Create a new mapping entry for this operation.
+    ASSERT_MSG(buffer_mappings.find(params.offset) == buffer_mappings.end(),
+               "Offset is already mapped");
+
+    BufferMapping mapping{};
+    mapping.nvmap_handle = params.nvmap_handle;
+    mapping.offset = params.offset;
+    mapping.size = object->size;
+
+    buffer_mappings[params.offset] = mapping;
+
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlUnmapBuffer params{};
+    std::memcpy(&params, input.data(), input.size());
+
+    NGLOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);
+
+    auto& gpu = Core::System::GetInstance().GPU();
+
+    auto itr = buffer_mappings.find(params.offset);
+
+    ASSERT_MSG(itr != buffer_mappings.end(), "Tried to unmap invalid mapping");
+
+    params.offset = gpu.memory_manager->UnmapBuffer(params.offset, itr->second.size);
+
+    buffer_mappings.erase(itr->second.offset);
+
    std::memcpy(output.data(), &params, output.size());
    return 0;
 }
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <memory>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 #include "common/common_types.h"
@@ -30,6 +31,7 @@ private:
        IocMapBufferExCommand = 0xC0284106,
        IocBindChannelCommand = 0x40044101,
        IocGetVaRegionsCommand = 0xC0404108,
+        IocUnmapBufferCommand = 0xC0084105,
    };

    struct IoctlInitalizeEx {
@@ -76,6 +78,11 @@ private:
    };
    static_assert(sizeof(IoctlMapBufferEx) == 40, "IoctlMapBufferEx is incorrect size");

+    struct IoctlUnmapBuffer {
+        u64_le offset;
+    };
+    static_assert(sizeof(IoctlUnmapBuffer) == 8, "IoctlUnmapBuffer is incorrect size");
+
    struct IoctlBindChannel {
        u32_le fd;
    };
@@ -98,12 +105,22 @@ private:
    static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2,
                  "IoctlGetVaRegions is incorrect size");

+    struct BufferMapping {
+        u64 offset;
+        u64 size;
+        u32 nvmap_handle;
+    };
+
+    /// Map containing the nvmap object mappings in GPU memory.
+    std::unordered_map<u64, BufferMapping> buffer_mappings;
+
    u32 channel{};

    u32 InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output);
    u32 AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output);
    u32 Remap(const std::vector<u8>& input, std::vector<u8>& output);
    u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
    u32 BindChannel(const std::vector<u8>& input, std::vector<u8>& output);
    u32 GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);

--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -16,7 +16,11 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<
    case IoctlCommand::IocGetConfigCommand:
        return NvOsGetConfigU32(input, output);
    case IoctlCommand::IocCtrlEventWaitCommand:
-        return IocCtrlEventWait(input, output);
+        return IocCtrlEventWait(input, output, false);
+    case IoctlCommand::IocCtrlEventWaitAsyncCommand:
+        return IocCtrlEventWait(input, output, true);
+    case IoctlCommand::IocCtrlEventRegisterCommand:
+        return IocCtrlEventRegister(input, output);
    }
    UNIMPLEMENTED_MSG("Unimplemented ioctl");
    return 0;
@@ -36,7 +40,7 @@ u32 nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>&
        } else if (!strcmp(params.param_str.data(), "NVRM_GPU_PREVENT_USE")) {
            params.config_str[0] = '0';
        } else {
-            params.config_str[0] = '0';
+            params.config_str[0] = '\0';
        }
    } else {
        UNIMPLEMENTED(); // unknown domain? Only nv has been seen so far on hardware
@@ -45,11 +49,13 @@ u32 nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>&
    return 0;
 }

-u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
+u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
+                                  bool is_async) {
    IocCtrlEventWaitParams params{};
    std::memcpy(&params, input.data(), sizeof(params));
-    NGLOG_WARNING(Service_NVDRV, "(STUBBED) called, syncpt_id={} threshold={} timeout={}",
-                  params.syncpt_id, params.threshold, params.timeout);
+    NGLOG_WARNING(Service_NVDRV,
+                  "(STUBBED) called, syncpt_id={}, threshold={}, timeout={}, is_async={}",
+                  params.syncpt_id, params.threshold, params.timeout, is_async);

    // TODO(Subv): Implement actual syncpt waiting.
    params.value = 0;
@@ -57,4 +63,10 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
    return 0;
 }

+u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) {
+    NGLOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    // TODO(bunnei): Implement this.
+    return 0;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -26,12 +26,64 @@ private:
        IocSyncptIncrCommand = 0x40040015,
        IocSyncptWaitCommand = 0xC00C0016,
        IocModuleMutexCommand = 0x40080017,
-        IocModuleRegRDWRCommand = 0xC008010E,
+        IocModuleRegRDWRCommand = 0xC0180018,
        IocSyncptWaitexCommand = 0xC0100019,
        IocSyncptReadMaxCommand = 0xC008001A,
-        IocCtrlEventWaitCommand = 0xC010001D,
        IocGetConfigCommand = 0xC183001B,
+        IocCtrlEventSignalCommand = 0xC004001C,
+        IocCtrlEventWaitCommand = 0xC010001D,
+        IocCtrlEventWaitAsyncCommand = 0xC010001E,
+        IocCtrlEventRegisterCommand = 0xC004001F,
+        IocCtrlEventUnregisterCommand = 0xC0040020,
+        IocCtrlEventKillCommand = 0x40080021,
    };
+    struct IocSyncptReadParams {
+        u32_le id;
+        u32_le value;
+    };
+    static_assert(sizeof(IocSyncptReadParams) == 8, "IocSyncptReadParams is incorrect size");
+
+    struct IocSyncptIncrParams {
+        u32_le id;
+    };
+    static_assert(sizeof(IocSyncptIncrParams) == 4, "IocSyncptIncrParams is incorrect size");
+
+    struct IocSyncptWaitParams {
+        u32_le id;
+        u32_le thresh;
+        s32_le timeout;
+    };
+    static_assert(sizeof(IocSyncptWaitParams) == 12, "IocSyncptWaitParams is incorrect size");
+
+    struct IocModuleMutexParams {
+        u32_le id;
+        u32_le lock; // (0 = unlock and 1 = lock)
+    };
+    static_assert(sizeof(IocModuleMutexParams) == 8, "IocModuleMutexParams is incorrect size");
+
+    struct IocModuleRegRDWRParams {
+        u32_le id;
+        u32_le num_offsets;
+        u32_le block_size;
+        u32_le offsets;
+        u32_le values;
+        u32_le write;
+    };
+    static_assert(sizeof(IocModuleRegRDWRParams) == 24, "IocModuleRegRDWRParams is incorrect size");
+
+    struct IocSyncptWaitexParams {
+        u32_le id;
+        u32_le thresh;
+        s32_le timeout;
+        u32_le value;
+    };
+    static_assert(sizeof(IocSyncptWaitexParams) == 16, "IocSyncptWaitexParams is incorrect size");
+
+    struct IocSyncptReadMaxParams {
+        u32_le id;
+        u32_le value;
+    };
+    static_assert(sizeof(IocSyncptReadMaxParams) == 8, "IocSyncptReadMaxParams is incorrect size");

    struct IocGetConfigParams {
        std::array<char, 0x41> domain_str;
@@ -40,6 +92,12 @@ private:
    };
    static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size");

+    struct IocCtrlEventSignalParams {
+        u32_le user_event_id;
+    };
+    static_assert(sizeof(IocCtrlEventSignalParams) == 4,
+                  "IocCtrlEventSignalParams is incorrect size");
+
    struct IocCtrlEventWaitParams {
        u32_le syncpt_id;
        u32_le threshold;
@@ -48,9 +106,37 @@ private:
    };
    static_assert(sizeof(IocCtrlEventWaitParams) == 16, "IocCtrlEventWaitParams is incorrect size");

+    struct IocCtrlEventWaitAsyncParams {
+        u32_le syncpt_id;
+        u32_le threshold;
+        u32_le timeout;
+        u32_le value;
+    };
+    static_assert(sizeof(IocCtrlEventWaitAsyncParams) == 16,
+                  "IocCtrlEventWaitAsyncParams is incorrect size");
+
+    struct IocCtrlEventRegisterParams {
+        u32_le user_event_id;
+    };
+    static_assert(sizeof(IocCtrlEventRegisterParams) == 4,
+                  "IocCtrlEventRegisterParams is incorrect size");
+
+    struct IocCtrlEventUnregisterParams {
+        u32_le user_event_id;
+    };
+    static_assert(sizeof(IocCtrlEventUnregisterParams) == 4,
+                  "IocCtrlEventUnregisterParams is incorrect size");
+
+    struct IocCtrlEventKill {
+        u64_le user_events;
+    };
+    static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size");
+
    u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);

-    u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async);
+
+    u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output);
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -24,6 +24,8 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vec
        return ZCullGetCtxSize(input, output);
    case IoctlCommand::IocZcullGetInfo:
        return ZCullGetInfo(input, output);
+    case IoctlCommand::IocZbcSetTable:
+        return ZBCSetTable(input, output);
    }
    UNIMPLEMENTED_MSG("Unimplemented ioctl");
    return 0;
@@ -77,9 +79,13 @@ u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vecto
 u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlGpuGetTpcMasksArgs params{};
    std::memcpy(&params, input.data(), input.size());
-    NGLOG_WARNING(Service_NVDRV, "(STUBBED) called, mask=0x{:X}, mask_buf_addr=0x{:X}",
-                  params.mask_buf_size, params.mask_buf_addr);
-    params.unk = 0xcafe; // TODO(ogniK): Needs to be non 0, what does this actually do?
+    NGLOG_INFO(Service_NVDRV, "called, mask=0x{:X}, mask_buf_addr=0x{:X}", params.mask_buf_size,
+               params.mask_buf_addr);
+    // TODO(ogniK): Confirm value on hardware
+    if (params.mask_buf_size)
+        params.tpc_mask_size = 4 * 1; // 4 * num_gpc
+    else
+        params.tpc_mask_size = 0;
    std::memcpy(output.data(), &params, sizeof(params));
    return 0;
 }
@@ -121,4 +127,13 @@ u32 nvhost_ctrl_gpu::ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>&
    return 0;
 }

+u32 nvhost_ctrl_gpu::ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output) {
+    NGLOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    IoctlZbcSetTable params{};
+    std::memcpy(&params, input.data(), input.size());
+    // TODO(ogniK): What does this even actually do?
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -25,6 +25,7 @@ private:
        IocGetActiveSlotMaskCommand = 0x80084714,
        IocZcullGetCtxSizeCommand = 0x80044701,
        IocZcullGetInfo = 0x80284702,
+        IocZbcSetTable = 0x402C4703,
    };

    struct IoctlGpuCharacteristics {
@@ -86,7 +87,7 @@ private:
        /// [in]  pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if
        /// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0.
        u64_le mask_buf_addr;
-        u64_le unk; // Nintendo add this?
+        u64_le tpc_mask_size; // Nintendo add this?
    };
    static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24,
                  "IoctlGpuGetTpcMasksArgs is incorrect size");
@@ -117,11 +118,21 @@ private:
    static_assert(sizeof(IoctlNvgpuGpuZcullGetInfoArgs) == 40,
                  "IoctlNvgpuGpuZcullGetInfoArgs is incorrect size");

+    struct IoctlZbcSetTable {
+        u32_le color_ds[4];
+        u32_le color_l2[4];
+        u32_le depth;
+        u32_le format;
+        u32_le type;
+    };
+    static_assert(sizeof(IoctlZbcSetTable) == 44, "IoctlZbcSetTable is incorrect size");
+
    u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output);
    u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
    u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
    u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
    u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output);
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -34,6 +34,8 @@ u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u
        return AllocateObjectContext(input, output);
    case IoctlCommand::IocChannelGetWaitbaseCommand:
        return GetWaitbase(input, output);
+    case IoctlCommand::IocChannelSetTimeoutCommand:
+        return ChannelSetTimeout(input, output);
    }

    if (command.group == NVGPU_IOCTL_MAGIC) {
@@ -149,4 +151,11 @@ u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& outpu
    return 0;
 }

+u32 nvhost_gpu::ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlChannelSetTimeout params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlChannelSetTimeout));
+    NGLOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout);
+    return 0;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -26,14 +26,23 @@ public:
 private:
    enum class IoctlCommand : u32_le {
        IocSetNVMAPfdCommand = 0x40044801,
+        IocAllocGPFIFOCommand = 0x40084805,
        IocSetClientDataCommand = 0x40084714,
        IocGetClientDataCommand = 0x80084715,
        IocZCullBind = 0xc010480b,
        IocSetErrorNotifierCommand = 0xC018480C,
        IocChannelSetPriorityCommand = 0x4004480D,
+        IocEnableCommand = 0x0000480E,
+        IocDisableCommand = 0x0000480F,
+        IocPreemptCommand = 0x00004810,
+        IocForceResetCommand = 0x00004811,
+        IocEventIdControlCommand = 0x40084812,
+        IocGetErrorNotificationCommand = 0xC0104817,
+        IocAllocGPFIFOExCommand = 0x40204818,
        IocAllocGPFIFOEx2Command = 0xC020481A,
        IocAllocObjCtxCommand = 0xC0104809,
        IocChannelGetWaitbaseCommand = 0xC0080003,
+        IocChannelSetTimeoutCommand = 0x40044803,
    };

    enum class CtxObjects : u32_le {
@@ -50,6 +59,17 @@ private:
    };
    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");

+    struct IoctlChannelSetTimeout {
+        u32_le timeout;
+    };
+    static_assert(sizeof(IoctlChannelSetTimeout) == 4, "IoctlChannelSetTimeout is incorrect size");
+
+    struct IoctlAllocGPFIFO {
+        u32_le num_entries;
+        u32_le flags;
+    };
+    static_assert(sizeof(IoctlAllocGPFIFO) == 8, "IoctlAllocGPFIFO is incorrect size");
+
    struct IoctlClientData {
        u64_le data;
    };
@@ -70,12 +90,45 @@ private:
    };
    static_assert(sizeof(IoctlSetErrorNotifier) == 24, "IoctlSetErrorNotifier is incorrect size");

+    struct IoctlChannelSetPriority {
+        u32_le priority;
+    };
+    static_assert(sizeof(IoctlChannelSetPriority) == 4,
+                  "IoctlChannelSetPriority is incorrect size");
+
+    struct IoctlEventIdControl {
+        u32_le cmd; // 0=disable, 1=enable, 2=clear
+        u32_le id;
+    };
+    static_assert(sizeof(IoctlEventIdControl) == 8, "IoctlEventIdControl is incorrect size");
+
+    struct IoctlGetErrorNotification {
+        u64_le timestamp;
+        u32_le info32;
+        u16_le info16;
+        u16_le status; // always 0xFFFF
+    };
+    static_assert(sizeof(IoctlGetErrorNotification) == 16,
+                  "IoctlGetErrorNotification is incorrect size");
+
    struct IoctlFence {
        u32_le id;
        u32_le value;
    };
    static_assert(sizeof(IoctlFence) == 8, "IoctlFence is incorrect size");

+    struct IoctlAllocGpfifoEx {
+        u32_le num_entries;
+        u32_le flags;
+        u32_le unk0;
+        u32_le unk1;
+        u32_le unk2;
+        u32_le unk3;
+        u32_le unk4;
+        u32_le unk5;
+    };
+    static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size");
+
    struct IoctlAllocGpfifoEx2 {
        u32_le num_entries;   // in
        u32_le flags;         // in
@@ -141,6 +194,7 @@ private:
    u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
    u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
    u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);

    std::shared_ptr<nvmap> nvmap_dev;
 };
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -0,0 +1,32 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
+
+namespace Service::Nvidia::Devices {
+
+u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    NGLOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
+                command.raw, input.size(), output.size());
+
+    switch (static_cast<IoctlCommand>(command.raw)) {
+    case IoctlCommand::IocSetNVMAPfdCommand:
+        return SetNVMAPfd(input, output);
+    }
+
+    UNIMPLEMENTED_MSG("Unimplemented ioctl");
+    return 0;
+}
+
+u32 nvhost_nvdec::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlSetNvmapFD params{};
+    std::memcpy(&params, input.data(), input.size());
+    NGLOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
+    nvmap_fd = params.nvmap_fd;
+    return 0;
+}
+
+} // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -0,0 +1,38 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstdlib>
+#include <cstring>
+#include <vector>
+#include "common/common_types.h"
+#include "core/hle/service/nvdrv/devices/nvdevice.h"
+
+namespace Service::Nvidia::Devices {
+
+class nvhost_nvdec final : public nvdevice {
+public:
+    nvhost_nvdec() = default;
+    ~nvhost_nvdec() override = default;
+
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+
+private:
+    enum class IoctlCommand : u32_le {
+        IocSetNVMAPfdCommand = 0x40044801,
+    };
+
+    struct IoctlSetNvmapFD {
+        u32_le nvmap_fd;
+    };
+    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
+
+    u32_le nvmap_fd{};
+
+    u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
+};
+
+} // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -30,6 +30,8 @@ u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& o
        return IocFromId(input, output);
    case IoctlCommand::Param:
        return IocParam(input, output);
+    case IoctlCommand::Free:
+        return IocFree(input, output);
    }

    UNIMPLEMENTED_MSG("Unimplemented ioctl");
@@ -45,6 +47,7 @@ u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
    object->id = next_id++;
    object->size = params.size;
    object->status = Object::Status::Created;
+    object->refcount = 1;

    u32 handle = next_handle++;
    handles[handle] = std::move(object);
@@ -101,6 +104,8 @@ u32 nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output) {
                            [&](const auto& entry) { return entry.second->id == params.id; });
    ASSERT(itr != handles.end());

+    itr->second->refcount++;
+
    // Return the existing handle instead of creating a new one.
    params.handle = itr->first;

@@ -114,25 +119,25 @@ u32 nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) {
    IocParamParams params;
    std::memcpy(&params, input.data(), sizeof(params));

-    NGLOG_WARNING(Service_NVDRV, "(STUBBED) called type={}", params.type);
+    NGLOG_WARNING(Service_NVDRV, "(STUBBED) called type={}", params.param);

    auto object = GetObject(params.handle);
    ASSERT(object);
    ASSERT(object->status == Object::Status::Allocated);

-    switch (static_cast<ParamTypes>(params.type)) {
+    switch (static_cast<ParamTypes>(params.param)) {
    case ParamTypes::Size:
-        params.value = object->size;
+        params.result = object->size;
        break;
    case ParamTypes::Alignment:
-        params.value = object->align;
+        params.result = object->align;
        break;
    case ParamTypes::Heap:
        // TODO(Subv): Seems to be a hardcoded value?
-        params.value = 0x40000000;
+        params.result = 0x40000000;
        break;
    case ParamTypes::Kind:
-        params.value = object->kind;
+        params.result = object->kind;
        break;
    default:
        UNIMPLEMENTED();
@@ -142,4 +147,34 @@ u32 nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) {
    return 0;
 }

+u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
+    enum FreeFlags {
+        Freed = 0,
+        NotFreedYet = 1,
+    };
+
+    IocFreeParams params;
+    std::memcpy(&params, input.data(), sizeof(params));
+
+    NGLOG_WARNING(Service_NVDRV, "(STUBBED) called");
+
+    auto itr = handles.find(params.handle);
+    ASSERT(itr != handles.end());
+
+    itr->second->refcount--;
+
+    params.refcount = itr->second->refcount;
+    params.size = itr->second->size;
+
+    if (itr->second->refcount == 0)
+        params.flags = Freed;
+    else
+        params.flags = NotFreedYet;
+
+    handles.erase(params.handle);
+
+    std::memcpy(output.data(), &params, sizeof(params));
+    return 0;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -34,6 +34,7 @@ public:
        u8 kind;
        VAddr addr;
        Status status;
+        u32 refcount;
    };

    std::shared_ptr<Object> GetObject(u32 handle) const {
@@ -58,16 +59,25 @@ private:
        Create = 0xC0080101,
        FromId = 0xC0080103,
        Alloc = 0xC0200104,
+        Free = 0xC0180105,
        Param = 0xC00C0109,
-        GetId = 0xC008010E
+        GetId = 0xC008010E,
    };
-
    struct IocCreateParams {
        // Input
        u32_le size;
        // Output
        u32_le handle;
    };
+    static_assert(sizeof(IocCreateParams) == 8, "IocCreateParams has wrong size");
+
+    struct IocFromIdParams {
+        // Input
+        u32_le id;
+        // Output
+        u32_le handle;
+    };
+    static_assert(sizeof(IocFromIdParams) == 8, "IocFromIdParams has wrong size");

    struct IocAllocParams {
        // Input
@@ -79,6 +89,25 @@ private:
        INSERT_PADDING_BYTES(7);
        u64_le addr;
    };
+    static_assert(sizeof(IocAllocParams) == 32, "IocAllocParams has wrong size");
+
+    struct IocFreeParams {
+        u32_le handle;
+        INSERT_PADDING_BYTES(4);
+        u64_le refcount;
+        u32_le size;
+        u32_le flags;
+    };
+    static_assert(sizeof(IocFreeParams) == 24, "IocFreeParams has wrong size");
+
+    struct IocParamParams {
+        // Input
+        u32_le handle;
+        u32_le param;
+        // Output
+        u32_le result;
+    };
+    static_assert(sizeof(IocParamParams) == 12, "IocParamParams has wrong size");

    struct IocGetIdParams {
        // Output
@@ -86,27 +115,14 @@ private:
        // Input
        u32_le handle;
    };
-
-    struct IocFromIdParams {
-        // Input
-        u32_le id;
-        // Output
-        u32_le handle;
-    };
-
-    struct IocParamParams {
-        // Input
-        u32_le handle;
-        u32_le type;
-        // Output
-        u32_le value;
-    };
+    static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");

    u32 IocCreate(const std::vector<u8>& input, std::vector<u8>& output);
    u32 IocAlloc(const std::vector<u8>& input, std::vector<u8>& output);
    u32 IocGetId(const std::vector<u8>& input, std::vector<u8>& output);
    u32 IocFromId(const std::vector<u8>& input, std::vector<u8>& output);
    u32 IocParam(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 IocFree(const std::vector<u8>& input, std::vector<u8>& output);
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -9,6 +9,7 @@
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
 #include "core/hle/service/nvdrv/interface.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
@@ -36,6 +37,7 @@ Module::Module() {
    devices["/dev/nvmap"] = nvmap_dev;
    devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev);
    devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>();
+    devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>();
 }

 u32 Module::Open(std::string device_name) {
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -20,6 +20,7 @@
 #include "core/hle/service/aoc/aoc_u.h"
 #include "core/hle/service/apm/apm.h"
 #include "core/hle/service/audio/audio.h"
+#include "core/hle/service/bcat/bcat.h"
 #include "core/hle/service/fatal/fatal.h"
 #include "core/hle/service/filesystem/filesystem.h"
 #include "core/hle/service/friend/friend.h"
@@ -144,10 +145,12 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
        rb.Push(RESULT_SUCCESS);
        return ResultCode(ErrorModule::HIPC, ErrorDescription::RemoteProcessDead);
    }
+    case IPC::CommandType::ControlWithContext:
    case IPC::CommandType::Control: {
        Core::System::GetInstance().ServiceManager().InvokeControlRequest(context);
        break;
    }
+    case IPC::CommandType::RequestWithContext:
    case IPC::CommandType::Request: {
        InvokeRequest(context);
        break;
@@ -181,6 +184,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm) {
    AM::InstallInterfaces(*sm, nv_flinger);
    AOC::InstallInterfaces(*sm);
    APM::InstallInterfaces(*sm);
+    BCAT::InstallInterfaces(*sm);
    Audio::InstallInterfaces(*sm);
    Fatal::InstallInterfaces(*sm);
    FileSystem::InstallInterfaces(*sm);
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <chrono>
+#include <ctime>
 #include "common/logging/log.h"
 #include "core/core_timing.h"
 #include "core/hle/ipc_helpers.h"
@@ -77,7 +78,7 @@ public:
            {3, nullptr, "LoadLocationNameList"},
            {4, &ITimeZoneService::LoadTimeZoneRule, "LoadTimeZoneRule"},
            {5, nullptr, "GetTimeZoneRuleVersion"},
-            {100, nullptr, "ToCalendarTime"},
+            {100, &ITimeZoneService::ToCalendarTime, "ToCalendarTime"},
            {101, &ITimeZoneService::ToCalendarTimeWithMyRule, "ToCalendarTimeWithMyRule"},
            {200, nullptr, "ToPosixTime"},
            {201, nullptr, "ToPosixTimeWithMyRule"},
@@ -86,9 +87,11 @@ public:
    }

 private:
+    LocationName location_name{"UTC"};
+    TimeZoneRule my_time_zone_rule{};
+
    void GetDeviceLocationName(Kernel::HLERequestContext& ctx) {
-        NGLOG_WARNING(Service_Time, "(STUBBED) called");
-        LocationName location_name{};
+        NGLOG_DEBUG(Service_Time, "called");
        IPC::ResponseBuilder rb{ctx, (sizeof(LocationName) / 4) + 2};
        rb.Push(RESULT_SUCCESS);
        rb.PushRaw(location_name);
@@ -103,23 +106,70 @@ private:

    void LoadTimeZoneRule(Kernel::HLERequestContext& ctx) {
        NGLOG_WARNING(Service_Time, "(STUBBED) called");
+
+        ctx.WriteBuffer(&my_time_zone_rule, sizeof(TimeZoneRule));
+
        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
    }

+    void ToCalendarTime(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const u64 posix_time = rp.Pop<u64>();
+
+        NGLOG_WARNING(Service_Time, "(STUBBED) called, posix_time=0x{:016X}", posix_time);
+
+        TimeZoneRule time_zone_rule{};
+        auto buffer = ctx.ReadBuffer();
+        std::memcpy(&time_zone_rule, buffer.data(), buffer.size());
+
+        CalendarTime calendar_time{2018, 1, 1, 0, 0, 0};
+        CalendarAdditionalInfo additional_info{};
+
+        PosixToCalendar(posix_time, calendar_time, additional_info, time_zone_rule);
+
+        IPC::ResponseBuilder rb{ctx, 10};
+        rb.Push(RESULT_SUCCESS);
+        rb.PushRaw(calendar_time);
+        rb.PushRaw(additional_info);
+    }
+
    void ToCalendarTimeWithMyRule(Kernel::HLERequestContext& ctx) {
        IPC::RequestParser rp{ctx};
-        u64 posix_time = rp.Pop<u64>();
+        const u64 posix_time = rp.Pop<u64>();

        NGLOG_WARNING(Service_Time, "(STUBBED) called, posix_time=0x{:016X}", posix_time);

        CalendarTime calendar_time{2018, 1, 1, 0, 0, 0};
        CalendarAdditionalInfo additional_info{};
+
+        PosixToCalendar(posix_time, calendar_time, additional_info, my_time_zone_rule);
+
        IPC::ResponseBuilder rb{ctx, 10};
        rb.Push(RESULT_SUCCESS);
        rb.PushRaw(calendar_time);
        rb.PushRaw(additional_info);
    }
+
+    void PosixToCalendar(u64 posix_time, CalendarTime& calendar_time,
+                         CalendarAdditionalInfo& additional_info, const TimeZoneRule& /*rule*/) {
+        std::time_t t(posix_time);
+        std::tm* tm = std::localtime(&t);
+        if (!tm) {
+            return;
+        }
+        calendar_time.year = tm->tm_year + 1900;
+        calendar_time.month = tm->tm_mon + 1;
+        calendar_time.day = tm->tm_mday;
+        calendar_time.hour = tm->tm_hour;
+        calendar_time.minute = tm->tm_min;
+        calendar_time.second = tm->tm_sec;
+
+        additional_info.day_of_week = tm->tm_wday;
+        additional_info.day_of_year = tm->tm_yday;
+        std::memcpy(additional_info.name.data(), "UTC", sizeof("UTC"));
+        additional_info.utc_offset = 0;
+    }
 };

 void Module::Interface::GetStandardUserSystemClock(Kernel::HLERequestContext& ctx) {
--- a/src/core/hle/service/time/time.h
+++ b/src/core/hle/service/time/time.h
@@ -4,13 +4,13 @@

 #pragma once

+#include <array>
 #include "core/hle/service/service.h"

 namespace Service::Time {

-// TODO(Rozelette) RE this structure
 struct LocationName {
-    INSERT_PADDING_BYTES(0x24);
+    std::array<u8, 0x24> name;
 };
 static_assert(sizeof(LocationName) == 0x24, "LocationName is incorrect size");

@@ -25,26 +25,34 @@ struct CalendarTime {
 };
 static_assert(sizeof(CalendarTime) == 0x8, "CalendarTime structure has incorrect size");

-// TODO(Rozelette) RE this structure
 struct CalendarAdditionalInfo {
-    INSERT_PADDING_BYTES(0x18);
+    u32_le day_of_week;
+    u32_le day_of_year;
+    std::array<u8, 8> name;
+    INSERT_PADDING_BYTES(1);
+    s32_le utc_offset;
 };
 static_assert(sizeof(CalendarAdditionalInfo) == 0x18,
              "CalendarAdditionalInfo structure has incorrect size");

-// TODO(bunnei) RE this structure
-struct SystemClockContext {
-    INSERT_PADDING_BYTES(0x20);
+// TODO(mailwl) RE this structure
+struct TimeZoneRule {
+    INSERT_PADDING_BYTES(0x4000);
 };
-static_assert(sizeof(SystemClockContext) == 0x20,
-              "SystemClockContext structure has incorrect size");

 struct SteadyClockTimePoint {
-    u64 value;
+    u64_le value;
    INSERT_PADDING_WORDS(4);
 };
 static_assert(sizeof(SteadyClockTimePoint) == 0x18, "SteadyClockTimePoint is incorrect size");

+struct SystemClockContext {
+    u64_le offset;
+    SteadyClockTimePoint time_point;
+};
+static_assert(sizeof(SystemClockContext) == 0x20,
+              "SystemClockContext structure has incorrect size");
+
 class Module final {
 public:
    class Interface : public ServiceFramework<Interface> {
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -28,8 +28,13 @@ static PageTable* current_page_table = nullptr;

 void SetCurrentPageTable(PageTable* page_table) {
    current_page_table = page_table;
-    if (Core::System::GetInstance().IsPoweredOn()) {
-        Core::CPU().PageTableChanged();
+
+    auto& system = Core::System::GetInstance();
+    if (system.IsPoweredOn()) {
+        system.ArmInterface(0).PageTableChanged();
+        system.ArmInterface(1).PageTableChanged();
+        system.ArmInterface(2).PageTableChanged();
+        system.ArmInterface(3).PageTableChanged();
    }
 }

--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -121,6 +121,7 @@ struct Values {

    // Core
    bool use_cpu_jit;
+    bool use_multi_core;

    // Data Storage
    bool use_virtual_sd;
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -155,6 +155,8 @@ TelemetrySession::TelemetrySession() {

    // Log user configuration information
    AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.use_cpu_jit);
+    AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
+             Settings::values.use_multi_core);
    AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor",
             Settings::values.resolution_factor);
    AddField(Telemetry::FieldType::UserConfig, "Renderer_ToggleFramelimit",
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -36,7 +36,6 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
    if (method == static_cast<u32>(BufferMethods::BindObject)) {
        // Bind the current subchannel to the desired engine id.
        NGLOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
-        ASSERT(bound_engines.find(subchannel) == bound_engines.end());
        bound_engines[subchannel] = static_cast<EngineID>(value);
        return;
    }
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1,4 +1,4 @@
-// Copyright 2018 yuzu Emulator Project
+// Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

@@ -75,6 +75,10 @@ union Attribute {
    enum class Index : u64 {
        Position = 7,
        Attribute_0 = 8,
+        // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
+        // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
+        // shader.
+        TessCoordInstanceIDVertexID = 47,
    };

    union {
@@ -193,6 +197,11 @@ union Instruction {
        BitField<50, 1, u64> abs_d;
        BitField<56, 1, u64> negate_imm;

+        union {
+            BitField<39, 3, u64> pred;
+            BitField<42, 1, u64> negate_pred;
+        } fmnmx;
+
        float GetImm20_19() const {
            float result{};
            u32 imm{static_cast<u32>(imm20_19)};
@@ -252,6 +261,19 @@ union Instruction {
        BitField<50, 1, u64> saturate_a;
    } conversion;

+    union {
+        BitField<31, 4, u64> component_mask;
+
+        bool IsComponentEnabled(size_t component) const {
+            return ((1 << component) & component_mask) != 0;
+        }
+    } tex;
+
+    union {
+        // TODO(bunnei): This is just a guess, needs to be verified
+        BitField<52, 1, u64> enable_g_component;
+    } texs;
+
    BitField<61, 1, u64> is_b_imm;
    BitField<60, 1, u64> is_b_gpr;
    BitField<59, 1, u64> is_c_gpr;
@@ -272,6 +294,7 @@ public:
        KIL,
        LD_A,
        ST_A,
+        TEX,
        TEXQ, // Texture Query
        TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
        TLDS, // Texture Load with scalar/non-vec4 source/destinations
@@ -288,8 +311,10 @@ public:
        FMUL_R,
        FMUL_IMM,
        FMUL32_IMM,
-        MUFU, // Multi-Function Operator
-        RRO,  // Range Reduction Operator
+        MUFU,  // Multi-Function Operator
+        RRO_C, // Range Reduction Operator
+        RRO_R,
+        RRO_IMM,
        F2F_C,
        F2F_R,
        F2F_IMM,
@@ -310,7 +335,9 @@ public:
        SHR_C,
        SHR_R,
        SHR_IMM,
-        FMNMX,
+        FMNMX_C,
+        FMNMX_R,
+        FMNMX_IMM,
        FSETP_C, // Set Predicate
        FSETP_R,
        FSETP_IMM,
@@ -320,6 +347,7 @@ public:
        ISETP_C,
        ISETP_IMM,
        ISETP_R,
+        PSETP,
    };

    enum class Type {
@@ -331,6 +359,7 @@ public:
        FloatSet,
        FloatSetPredicate,
        IntegerSetPredicate,
+        PredicateSetPredicate,
        Conversion,
        Unknown,
    };
@@ -429,6 +458,7 @@ private:
            INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
            INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
            INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+            INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
            INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
            INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
            INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
@@ -446,10 +476,12 @@ private:
            INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
            INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
-            INST("0101110010010---", Id::RRO, Type::Arithmetic, "RRO"),
-            INST("0100110010101---", Id::F2F_C, Type::Arithmetic, "F2F_C"),
-            INST("0101110010101---", Id::F2F_R, Type::Arithmetic, "F2F_R"),
-            INST("0011100-10101---", Id::F2F_IMM, Type::Arithmetic, "F2F_IMM"),
+            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
+            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
+            INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"),
+            INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
+            INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
+            INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
            INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"),
            INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"),
            INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"),
@@ -461,7 +493,9 @@ private:
            INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"),
            INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"),
            INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"),
-            INST("0101110001100---", Id::FMNMX, Type::Arithmetic, "FMNMX"),
+            INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
+            INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
+            INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
            INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
            INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
            INST("01110001-1000---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
@@ -477,6 +511,7 @@ private:
            INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
            INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
            INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
+            INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
        };
 #undef INST
        std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -58,6 +58,25 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
    return gpu_addr;
 }

+GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
+    ASSERT((gpu_addr & PAGE_MASK) == 0);
+
+    for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
+        ASSERT(PageSlot(gpu_addr + offset) != static_cast<u64>(PageStatus::Allocated) &&
+               PageSlot(gpu_addr + offset) != static_cast<u64>(PageStatus::Unmapped));
+        PageSlot(gpu_addr + offset) = static_cast<u64>(PageStatus::Unmapped);
+    }
+
+    // Delete the region mappings that are contained within the unmapped region
+    mapped_regions.erase(std::remove_if(mapped_regions.begin(), mapped_regions.end(),
+                                        [&](const MappedRegion& region) {
+                                            return region.gpu_addr <= gpu_addr &&
+                                                   region.gpu_addr + region.size < gpu_addr + size;
+                                        }),
+                         mapped_regions.end());
+    return gpu_addr;
+}
+
 boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
    GPUVAddr gpu_addr = 0;
    u64 free_space = 0;
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -25,6 +25,7 @@ public:
    GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
    GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
+    GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
    boost::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
    std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -75,14 +75,11 @@ RasterizerOpenGL::RasterizerOpenGL() {
    // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
    state.clip_distance[0] = true;

-    // Generate VBO, VAO and UBO
-    vertex_buffer = OGLStreamBuffer::MakeBuffer(GLAD_GL_ARB_buffer_storage, GL_ARRAY_BUFFER);
-    vertex_buffer->Create(VERTEX_BUFFER_SIZE, VERTEX_BUFFER_SIZE / 2);
+    // Generate VAO and UBO
    sw_vao.Create();
    uniform_buffer.Create();

    state.draw.vertex_array = sw_vao.handle;
-    state.draw.vertex_buffer = vertex_buffer->GetHandle();
    state.draw.uniform_buffer = uniform_buffer.handle;
    state.Apply();

@@ -90,7 +87,6 @@ RasterizerOpenGL::RasterizerOpenGL() {
    framebuffer.Create();

    hw_vao.Create();
-    hw_vao_enabled_attributes.fill(false);

    stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
    stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
@@ -181,8 +177,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
        glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
                             attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
        glVertexAttribBinding(index, attrib.buffer);
-
-        hw_vao_enabled_attributes[index] = true;
    }

    return {array_ptr, buffer_offset};
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -134,21 +134,17 @@ private:
    std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
    OGLVertexArray sw_vao;
    OGLVertexArray hw_vao;
-    std::array<bool, 16> hw_vao_enabled_attributes;

    std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
    std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>,
               Tegra::Engines::Maxwell3D::Regs::MaxShaderStage>
        ssbos;

-    static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
-    std::unique_ptr<OGLStreamBuffer> vertex_buffer;
+    static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
+    std::unique_ptr<OGLStreamBuffer> stream_buffer;
    OGLBuffer uniform_buffer;
    OGLFramebuffer framebuffer;

-    static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024;
-    std::unique_ptr<OGLStreamBuffer> stream_buffer;
-
    size_t CalculateVertexArraysSize() const;

    std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset);
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -45,19 +45,23 @@ struct FormatTuple {

 static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                    // ABGR8
-    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                       // B5G6R5
+    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, false},                           // B5G6R5
    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},              // A2B10G10R10
+    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false},                // A1B5G5R5
+    {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false},                                   // R8
+    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false},                                // RGBA16F
    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true},   // DXT1
    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
+    {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true},           // DXN1
 }};

 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
    const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
    if (type == SurfaceType::ColorTexture) {
        ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
-        // For now only UNORM components are supported
-        ASSERT(component_type == ComponentType::UNorm);
+        // For now only UNORM components are supported, or RGBA16F which is type FLOAT
+        ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F);
        return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
    } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
        // TODO(Subv): Implement depth formats
@@ -107,8 +111,10 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
                            SurfaceParams::MaxPixelFormat>
    morton_to_gl_fns = {
        MortonCopy<true, PixelFormat::ABGR8>,       MortonCopy<true, PixelFormat::B5G6R5>,
-        MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::DXT1>,
-        MortonCopy<true, PixelFormat::DXT23>,       MortonCopy<true, PixelFormat::DXT45>,
+        MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
+        MortonCopy<true, PixelFormat::R8>,          MortonCopy<true, PixelFormat::RGBA16F>,
+        MortonCopy<true, PixelFormat::DXT1>,        MortonCopy<true, PixelFormat::DXT23>,
+        MortonCopy<true, PixelFormat::DXT45>,       MortonCopy<true, PixelFormat::DXN1>,
 };

 static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
@@ -118,7 +124,11 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
        MortonCopy<false, PixelFormat::ABGR8>,
        MortonCopy<false, PixelFormat::B5G6R5>,
        MortonCopy<false, PixelFormat::A2B10G10R10>,
-        // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported
+        MortonCopy<false, PixelFormat::A1B5G5R5>,
+        MortonCopy<false, PixelFormat::R8>,
+        MortonCopy<false, PixelFormat::RGBA16F>,
+        // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
+        nullptr,
        nullptr,
        nullptr,
        nullptr,
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -57,9 +57,13 @@ struct SurfaceParams {
        ABGR8 = 0,
        B5G6R5 = 1,
        A2B10G10R10 = 2,
-        DXT1 = 3,
-        DXT23 = 4,
-        DXT45 = 5,
+        A1B5G5R5 = 3,
+        R8 = 4,
+        RGBA16F = 5,
+        DXT1 = 6,
+        DXT23 = 7,
+        DXT45 = 8,
+        DXN1 = 9, // This is also known as BC4

        Max,
        Invalid = 255,
@@ -98,9 +102,13 @@ struct SurfaceParams {
            1, // ABGR8
            1, // B5G6R5
            1, // A2B10G10R10
+            1, // A1B5G5R5
+            1, // R8
+            2, // RGBA16F
            4, // DXT1
            4, // DXT23
            4, // DXT45
+            4, // DXN1
        }};

        ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -118,9 +126,13 @@ struct SurfaceParams {
            32,  // ABGR8
            16,  // B5G6R5
            32,  // A2B10G10R10
+            16,  // A1B5G5R5
+            8,   // R8
+            64,  // RGBA16F
            64,  // DXT1
            128, // DXT23
            128, // DXT45
+            64,  // DXN1
        }};

        ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -133,9 +145,12 @@ struct SurfaceParams {
    static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
        switch (format) {
        case Tegra::RenderTargetFormat::RGBA8_UNORM:
+        case Tegra::RenderTargetFormat::RGBA8_SRGB:
            return PixelFormat::ABGR8;
        case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
            return PixelFormat::A2B10G10R10;
+        case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+            return PixelFormat::RGBA16F;
        default:
            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
@@ -161,12 +176,20 @@ struct SurfaceParams {
            return PixelFormat::B5G6R5;
        case Tegra::Texture::TextureFormat::A2B10G10R10:
            return PixelFormat::A2B10G10R10;
+        case Tegra::Texture::TextureFormat::A1B5G5R5:
+            return PixelFormat::A1B5G5R5;
+        case Tegra::Texture::TextureFormat::R8:
+            return PixelFormat::R8;
+        case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
+            return PixelFormat::RGBA16F;
        case Tegra::Texture::TextureFormat::DXT1:
            return PixelFormat::DXT1;
        case Tegra::Texture::TextureFormat::DXT23:
            return PixelFormat::DXT23;
        case Tegra::Texture::TextureFormat::DXT45:
            return PixelFormat::DXT45;
+        case Tegra::Texture::TextureFormat::DXN1:
+            return PixelFormat::DXN1;
        default:
            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
@@ -182,12 +205,20 @@ struct SurfaceParams {
            return Tegra::Texture::TextureFormat::B5G6R5;
        case PixelFormat::A2B10G10R10:
            return Tegra::Texture::TextureFormat::A2B10G10R10;
+        case PixelFormat::A1B5G5R5:
+            return Tegra::Texture::TextureFormat::A1B5G5R5;
+        case PixelFormat::R8:
+            return Tegra::Texture::TextureFormat::R8;
+        case PixelFormat::RGBA16F:
+            return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
        case PixelFormat::DXT1:
            return Tegra::Texture::TextureFormat::DXT1;
        case PixelFormat::DXT23:
            return Tegra::Texture::TextureFormat::DXT23;
        case PixelFormat::DXT45:
            return Tegra::Texture::TextureFormat::DXT45;
+        case PixelFormat::DXN1:
+            return Tegra::Texture::TextureFormat::DXN1;
        default:
            UNREACHABLE();
        }
@@ -208,8 +239,11 @@ struct SurfaceParams {
        // TODO(Subv): Implement more render targets
        switch (format) {
        case Tegra::RenderTargetFormat::RGBA8_UNORM:
+        case Tegra::RenderTargetFormat::RGBA8_SRGB:
        case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
            return ComponentType::UNorm;
+        case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+            return ComponentType::Float;
        default:
            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -299,7 +299,7 @@ public:
     * are stored as floats, so this may require conversion.
     * @param reg The destination register to use.
     * @param elem The element to use for the operation.
-     * @param attribute The input attibute to use as the source value.
+     * @param attribute The input attribute to use as the source value.
     */
    void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute) {
        std::string dest = GetRegisterAsFloat(reg);
@@ -451,6 +451,12 @@ private:
        switch (attribute) {
        case Attribute::Index::Position:
            return "position";
+        case Attribute::Index::TessCoordInstanceIDVertexID:
+            // TODO(Subv): Find out what the values are for the first two elements when inside a
+            // vertex shader, and what's the value of the fourth element when inside a Tess Eval
+            // shader.
+            ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
+            return "vec4(0, 0, gl_InstanceID, gl_VertexID)";
        default:
            const u32 index{static_cast<u32>(attribute) -
                            static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -580,20 +586,62 @@ private:
     * @param instr Instruction to generate the if condition for.
     * @returns string containing the predicate condition.
     */
-    std::string GetPredicateCondition(Instruction instr) const {
+    std::string GetPredicateCondition(u64 index, bool negate) const {
        using Tegra::Shader::Pred;
-        ASSERT(instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex));
+        std::string variable;

-        std::string variable =
-            'p' + std::to_string(static_cast<u64>(instr.pred.pred_index.Value()));
+        // Index 7 is used as an 'Always True' condition.
+        if (index == static_cast<u64>(Pred::UnusedIndex))
+            variable = "true";
+        else
+            variable = 'p' + std::to_string(index);

-        if (instr.negate_pred) {
+        if (negate) {
            return "!(" + variable + ')';
        }

        return variable;
    }

+    /**
+     * Returns the comparison string to use to compare two values in the 'set' family of
+     * instructions.
+     * @params condition The condition used in the 'set'-family instruction.
+     * @returns String corresponding to the GLSL operator that matches the desired comparison.
+     */
+    std::string GetPredicateComparison(Tegra::Shader::PredCondition condition) const {
+        using Tegra::Shader::PredCondition;
+        static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
+            {PredCondition::LessThan, "<"},      {PredCondition::Equal, "=="},
+            {PredCondition::LessEqual, "<="},    {PredCondition::GreaterThan, ">"},
+            {PredCondition::GreaterEqual, ">="},
+        };
+
+        auto comparison = PredicateComparisonStrings.find(condition);
+        ASSERT_MSG(comparison != PredicateComparisonStrings.end(),
+                   "Unknown predicate comparison operation");
+        return comparison->second;
+    }
+
+    /**
+     * Returns the operator string to use to combine two predicates in the 'setp' family of
+     * instructions.
+     * @params operation The operator used in the 'setp'-family instruction.
+     * @returns String corresponding to the GLSL operator that matches the desired operator.
+     */
+    std::string GetPredicateCombiner(Tegra::Shader::PredOperation operation) const {
+        using Tegra::Shader::PredOperation;
+        static const std::unordered_map<PredOperation, const char*> PredicateOperationStrings = {
+            {PredOperation::And, "&&"},
+            {PredOperation::Or, "||"},
+            {PredOperation::Xor, "^^"},
+        };
+
+        auto op = PredicateOperationStrings.find(operation);
+        ASSERT_MSG(op != PredicateOperationStrings.end(), "Unknown predicate operation");
+        return op->second;
+    }
+
    /*
     * Returns whether the instruction at the specified offset is a 'sched' instruction.
     * Sched instructions always appear before a sequence of 3 instructions.
@@ -634,7 +682,9 @@ private:
                   "NeverExecute predicate not implemented");

        if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
-            shader.AddLine("if (" + GetPredicateCondition(instr) + ')');
+            shader.AddLine("if (" +
+                           GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) +
+                           ')');
            shader.AddLine('{');
            ++shader.scope;
        }
@@ -730,8 +780,25 @@ private:
                }
                break;
            }
-            case OpCode::Id::RRO: {
-                NGLOG_DEBUG(HW_GPU, "Skipping RRO instruction");
+            case OpCode::Id::FMNMX_C:
+            case OpCode::Id::FMNMX_R:
+            case OpCode::Id::FMNMX_IMM: {
+                std::string condition =
+                    GetPredicateCondition(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
+                std::string parameters = op_a + ',' + op_b;
+                regs.SetRegisterToFloat(instr.gpr0, 0,
+                                        '(' + condition + ") ? min(" + parameters + ") : max(" +
+                                            parameters + ')',
+                                        1, 1);
+                break;
+            }
+            case OpCode::Id::RRO_C:
+            case OpCode::Id::RRO_R:
+            case OpCode::Id::RRO_IMM: {
+                // Currently RRO is only implemented as a register move.
+                // Usage of `abs_b` and `negate_b` here should also be correct.
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
+                NGLOG_WARNING(HW_GPU, "RRO instruction is incomplete");
                break;
            }
            default: {
@@ -778,13 +845,14 @@ private:
        }
        case OpCode::Type::Conversion: {
            ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented");
-            ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
            ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
            ASSERT_MSG(!instr.conversion.saturate_a, "Unimplemented");

            switch (opcode->GetId()) {
            case OpCode::Id::I2I_R:
            case OpCode::Id::I2F_R: {
+                ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
+
                std::string op_a =
                    regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed);

@@ -795,6 +863,16 @@ private:
                regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_signed, 0, op_a, 1, 1);
                break;
            }
+            case OpCode::Id::F2F_R: {
+                std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
+
+                if (instr.conversion.abs_a) {
+                    op_a = "abs(" + op_a + ')';
+                }
+
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+                break;
+            }
            default: {
                NGLOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName());
                UNREACHABLE();
@@ -818,10 +896,10 @@ private:
                                                  instr.gpr0);
                break;
            }
-            case OpCode::Id::TEXS: {
+            case OpCode::Id::TEX: {
                ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
-                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
+                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
                const std::string sampler = GetSampler(instr.sampler);
                const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
                // Add an extra scope and declare the texture coords inside to prevent overwriting
@@ -830,8 +908,49 @@ private:
                ++shader.scope;
                shader.AddLine(coord);
                const std::string texture = "texture(" + sampler + ", coords)";
-                for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
-                    regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, elem);
+
+                size_t dest_elem{};
+                for (size_t elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
+                    if (!instr.tex.IsComponentEnabled(elem)) {
+                        // Skip disabled components
+                        continue;
+                    }
+                    regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem);
+                    ++dest_elem;
+                }
+                --shader.scope;
+                shader.AddLine("}");
+                break;
+            }
+            case OpCode::Id::TEXS: {
+                ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
+                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
+                const std::string sampler = GetSampler(instr.sampler);
+                const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+                // Add an extra scope and declare the texture coords inside to prevent
+                // overwriting them in case they are used as outputs of the texs instruction.
+                shader.AddLine("{");
+                ++shader.scope;
+                shader.AddLine(coord);
+                const std::string texture = "texture(" + sampler + ", coords)";
+
+                // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA goes
+                // into gpr28+0 and gpr28+1
+                size_t offset{};
+                for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) {
+                    for (unsigned elem = 0; elem < 2; ++elem) {
+                        if (dest + elem >= Register::ZeroIndex) {
+                            // Skip invalid register values
+                            break;
+                        }
+                        regs.SetRegisterToFloat(dest, elem + offset, texture, 1, 4, false, elem);
+                        if (!instr.texs.enable_g_component) {
+                            // Skip the second component
+                            break;
+                        }
+                    }
+                    offset += 2;
                }
                --shader.scope;
                shader.AddLine("}");
@@ -873,28 +992,25 @@ private:
            }

            using Tegra::Shader::Pred;
-            ASSERT_MSG(instr.fsetp.pred0 == static_cast<u64>(Pred::UnusedIndex) &&
-                           instr.fsetp.pred39 == static_cast<u64>(Pred::UnusedIndex),
-                       "Compound predicates are not implemented");
-
            // We can't use the constant predicate as destination.
            ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));

-            using Tegra::Shader::PredCondition;
-            switch (instr.fsetp.cond) {
-            case PredCondition::LessThan:
-                SetPredicate(instr.fsetp.pred3, '(' + op_a + ") < (" + op_b + ')');
-                break;
-            case PredCondition::Equal:
-                SetPredicate(instr.fsetp.pred3, '(' + op_a + ") == (" + op_b + ')');
-                break;
-            case PredCondition::LessEqual:
-                SetPredicate(instr.fsetp.pred3, '(' + op_a + ") <= (" + op_b + ')');
-                break;
-            default:
-                NGLOG_CRITICAL(HW_GPU, "Unhandled predicate condition: {} (a: {}, b: {})",
-                               static_cast<unsigned>(instr.fsetp.cond.Value()), op_a, op_b);
-                UNREACHABLE();
+            std::string second_pred =
+                GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
+
+            std::string comparator = GetPredicateComparison(instr.fsetp.cond);
+            std::string combiner = GetPredicateCombiner(instr.fsetp.op);
+
+            std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')';
+            // Set the primary predicate to the result of Predicate OP SecondPredicate
+            SetPredicate(instr.fsetp.pred3,
+                         '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+
+            if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+                // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+                // if enabled
+                SetPredicate(instr.fsetp.pred0,
+                             "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
            }
            break;
        }
@@ -926,35 +1042,18 @@ private:
                op_b = "abs(" + op_b + ')';
            }

-            using Tegra::Shader::Pred;
-            ASSERT_MSG(instr.fset.pred39 == static_cast<u64>(Pred::UnusedIndex),
-                       "Compound predicates are not implemented");
-
            // The fset instruction sets a register to 1.0 if the condition is true, and to 0
            // otherwise.
-            using Tegra::Shader::PredCondition;
-            switch (instr.fset.cond) {
-            case PredCondition::LessThan:
-                regs.SetRegisterToFloat(instr.gpr0, 0,
-                                        "((" + op_a + ") < (" + op_b + ")) ? 1.0 : 0", 1, 1);
-                break;
-            case PredCondition::Equal:
-                regs.SetRegisterToFloat(instr.gpr0, 0,
-                                        "((" + op_a + ") == (" + op_b + ")) ? 1.0 : 0", 1, 1);
-                break;
-            case PredCondition::LessEqual:
-                regs.SetRegisterToFloat(instr.gpr0, 0,
-                                        "((" + op_a + ") <= (" + op_b + ")) ? 1.0 : 0", 1, 1);
-                break;
-            case PredCondition::GreaterThan:
-                regs.SetRegisterToFloat(instr.gpr0, 0,
-                                        "((" + op_a + ") > (" + op_b + ")) ? 1.0 : 0", 1, 1);
-                break;
-            default:
-                NGLOG_CRITICAL(HW_GPU, "Unhandled predicate condition: {} (a: {}, b: {})",
-                               static_cast<unsigned>(instr.fset.cond.Value()), op_a, op_b);
-                UNREACHABLE();
-            }
+            std::string second_pred =
+                GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
+
+            std::string comparator = GetPredicateComparison(instr.fset.cond);
+            std::string combiner = GetPredicateCombiner(instr.fset.op);
+
+            std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
+                                    combiner + " (" + second_pred + "))";
+
+            regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
            break;
        }
        default: {
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <string>
 #include <vector>
 #include <glad/glad.h>
 #include "common/assert.h"
@@ -11,6 +12,27 @@

 namespace GLShader {

+/**
+ * Utility function to log the source code of a list of shaders.
+ * @param shaders The OpenGL shaders whose source we will print.
+ */
+template <typename... T>
+void LogShaderSource(T... shaders) {
+    auto shader_list = {shaders...};
+
+    for (const auto& shader : shader_list) {
+        if (shader == 0)
+            continue;
+
+        GLint source_length;
+        glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length);
+
+        std::string source(source_length, ' ');
+        glGetShaderSource(shader, source_length, nullptr, &source[0]);
+        NGLOG_INFO(Render_OpenGL, "Shader source {}", source);
+    }
+}
+
 /**
 * Utility function to create and compile an OpenGL GLSL shader
 * @param source String of the GLSL shader program
@@ -55,6 +77,11 @@ GLuint LoadProgram(bool separable_program, T... shaders) {
        }
    }

+    if (result == GL_FALSE) {
+        // There was a problem linking the shader, print the source for debugging purposes.
+        LogShaderSource(shaders...);
+    }
+
    ASSERT_MSG(result == GL_TRUE, "Shader not linked");

    ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -46,6 +46,7 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
 u32 BytesPerPixel(TextureFormat format) {
    switch (format) {
    case TextureFormat::DXT1:
+    case TextureFormat::DXN1:
        // In this case a 'pixel' actually refers to a 4x4 tile.
        return 8;
    case TextureFormat::DXT23:
@@ -55,8 +56,13 @@ u32 BytesPerPixel(TextureFormat format) {
    case TextureFormat::A8R8G8B8:
    case TextureFormat::A2B10G10R10:
        return 4;
+    case TextureFormat::A1B5G5R5:
    case TextureFormat::B5G6R5:
        return 2;
+    case TextureFormat::R8:
+        return 1;
+    case TextureFormat::R16_G16_B16_A16:
+        return 8;
    default:
        UNIMPLEMENTED_MSG("Format not implemented");
        break;
@@ -74,13 +80,18 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
    case TextureFormat::DXT1:
    case TextureFormat::DXT23:
    case TextureFormat::DXT45:
-        // In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values.
+    case TextureFormat::DXN1:
+        // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel
+        // values.
        CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
                         unswizzled_data.data(), true, block_height);
        break;
    case TextureFormat::A8R8G8B8:
    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A1B5G5R5:
    case TextureFormat::B5G6R5:
+    case TextureFormat::R8:
+    case TextureFormat::R16_G16_B16_A16:
        CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
                         unswizzled_data.data(), true, block_height);
        break;
@@ -101,9 +112,12 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
    case TextureFormat::DXT1:
    case TextureFormat::DXT23:
    case TextureFormat::DXT45:
+    case TextureFormat::DXN1:
    case TextureFormat::A8R8G8B8:
    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A1B5G5R5:
    case TextureFormat::B5G6R5:
+    case TextureFormat::R8:
        // TODO(Subv): For the time being just forward the same data without any decoding.
        rgba_data = texture_data;
        break;
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -14,12 +14,82 @@ namespace Tegra {
 namespace Texture {

 enum class TextureFormat : u32 {
-    A8R8G8B8 = 0x8,
-    A2B10G10R10 = 0x9,
+    R32_G32_B32_A32 = 0x01,
+    R32_G32_B32 = 0x02,
+    R16_G16_B16_A16 = 0x03,
+    R32_G32 = 0x04,
+    R32_B24G8 = 0x05,
+    ETC2_RGB = 0x06,
+    X8B8G8R8 = 0x07,
+    A8R8G8B8 = 0x08,
+    A2B10G10R10 = 0x09,
+    ETC2_RGB_PTA = 0x0a,
+    ETC2_RGBA = 0x0b,
+    R16_G16 = 0x0c,
+    G8R24 = 0x0d,
+    G24R8 = 0x0e,
+    R32 = 0x0f,
+    BC6H_SF16 = 0x10,
+    BC6H_UF16 = 0x11,
+    A4B4G4R4 = 0x12,
+    A5B5G5R1 = 0x13,
+    A1B5G5R5 = 0x14,
    B5G6R5 = 0x15,
+    B6G5R5 = 0x16,
+    BC7U = 0x17,
+    G8R8 = 0x18,
+    EAC = 0x19,
+    EACX2 = 0x1a,
+    R16 = 0x1b,
+    Y8_VIDEO = 0x1c,
+    R8 = 0x1d,
+    G4R4 = 0x1e,
+    R1 = 0x1f,
+    E5B9G9R9_SHAREDEXP = 0x20,
+    BF10GF11RF11 = 0x21,
+    G8B8G8R8 = 0x22,
+    B8G8R8G8 = 0x23,
    DXT1 = 0x24,
    DXT23 = 0x25,
    DXT45 = 0x26,
+    DXN1 = 0x27,
+    DXN2 = 0x28,
+    Z24S8 = 0x29,
+    X8Z24 = 0x2a,
+    S8Z24 = 0x2b,
+    X4V4Z24__COV4R4V = 0x2c,
+    X4V4Z24__COV8R8V = 0x2d,
+    V8Z24__COV4R12V = 0x2e,
+    ZF32 = 0x2f,
+    ZF32_X24S8 = 0x30,
+    X8Z24_X20V4S8__COV4R4V = 0x31,
+    X8Z24_X20V4S8__COV8R8V = 0x32,
+    ZF32_X20V4X8__COV4R4V = 0x33,
+    ZF32_X20V4X8__COV8R8V = 0x34,
+    ZF32_X20V4S8__COV4R4V = 0x35,
+    ZF32_X20V4S8__COV8R8V = 0x36,
+    X8Z24_X16V8S8__COV4R12V = 0x37,
+    ZF32_X16V8X8__COV4R12V = 0x38,
+    ZF32_X16V8S8__COV4R12V = 0x39,
+    Z16 = 0x3a,
+    V8Z24__COV8R24V = 0x3b,
+    X8Z24_X16V8S8__COV8R24V = 0x3c,
+    ZF32_X16V8X8__COV8R24V = 0x3d,
+    ZF32_X16V8S8__COV8R24V = 0x3e,
+    ASTC_2D_4X4 = 0x40,
+    ASTC_2D_5X5 = 0x41,
+    ASTC_2D_6X6 = 0x42,
+    ASTC_2D_8X8 = 0x44,
+    ASTC_2D_10X10 = 0x45,
+    ASTC_2D_12X12 = 0x46,
+    ASTC_2D_5X4 = 0x50,
+    ASTC_2D_6X5 = 0x51,
+    ASTC_2D_8X6 = 0x52,
+    ASTC_2D_10X8 = 0x53,
+    ASTC_2D_12X10 = 0x54,
+    ASTC_2D_8X5 = 0x55,
+    ASTC_2D_10X5 = 0x56,
+    ASTC_2D_10X6 = 0x57,
 };

 enum class TextureType : u32 {
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -78,6 +78,7 @@ void Config::ReadValues() {

    qt_config->beginGroup("Core");
    Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool();
+    Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool();
    qt_config->endGroup();

    qt_config->beginGroup("Renderer");
@@ -177,6 +178,7 @@ void Config::SaveValues() {

    qt_config->beginGroup("Core");
    qt_config->setValue("use_cpu_jit", Settings::values.use_cpu_jit);
+    qt_config->setValue("use_multi_core", Settings::values.use_multi_core);
    qt_config->endGroup();

    qt_config->beginGroup("Renderer");
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -20,6 +20,7 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
    this->setConfiguration();

    ui->use_cpu_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+    ui->use_multi_core->setEnabled(!Core::System::GetInstance().IsPoweredOn());
    ui->use_docked_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn());
 }

@@ -30,6 +31,7 @@ void ConfigureGeneral::setConfiguration() {
    ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
    ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
    ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit);
+    ui->use_multi_core->setChecked(Settings::values.use_multi_core);
    ui->use_docked_mode->setChecked(Settings::values.use_docked_mode);
 }

@@ -40,6 +42,7 @@ void ConfigureGeneral::applyConfiguration() {
        ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString();

    Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked();
+    Settings::values.use_multi_core = ui->use_multi_core->isChecked();
    Settings::values.use_docked_mode = ui->use_docked_mode->isChecked();
    Settings::Apply();
 }
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -58,6 +58,13 @@
            </property>
           </widget>
          </item>
+          <item>
+           <widget class="QCheckBox" name="use_multi_core">
+            <property name="text">
+             <string>Enable multi-core</string>
+            </property>
+           </widget>
+          </item>
         </layout>
        </item>
       </layout>
--- a/src/yuzu/debugger/registers.cpp
+++ b/src/yuzu/debugger/registers.cpp
@@ -63,7 +63,7 @@ void RegistersWidget::OnDebugModeEntered() {

    for (int i = 0; i < core_registers->childCount(); ++i)
        core_registers->child(i)->setText(
-            1, QString("0x%1").arg(Core::CPU().GetReg(i), 8, 16, QLatin1Char('0')));
+            1, QString("0x%1").arg(Core::CurrentArmInterface().GetReg(i), 8, 16, QLatin1Char('0')));

    UpdateCPSRValues();
 }
@@ -122,7 +122,7 @@ void RegistersWidget::CreateCPSRChildren() {
 }

 void RegistersWidget::UpdateCPSRValues() {
-    const u32 cpsr_val = Core::CPU().GetCPSR();
+    const u32 cpsr_val = Core::CurrentArmInterface().GetCPSR();

    cpsr->setText(1, QString("0x%1").arg(cpsr_val, 8, 16, QLatin1Char('0')));
    cpsr->child(0)->setText(
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -51,13 +51,21 @@ std::size_t WaitTreeItem::Row() const {
 }

 std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList() {
-    const auto& threads = Core::System::GetInstance().Scheduler().GetThreadList();
    std::vector<std::unique_ptr<WaitTreeThread>> item_list;
-    item_list.reserve(threads.size());
-    for (std::size_t i = 0; i < threads.size(); ++i) {
-        item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
-        item_list.back()->row = i;
-    }
+    std::size_t row = 0;
+    auto add_threads = [&](const std::vector<Kernel::SharedPtr<Kernel::Thread>>& threads) {
+        for (std::size_t i = 0; i < threads.size(); ++i) {
+            item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
+            item_list.back()->row = row;
+            ++row;
+        }
+    };
+
+    add_threads(Core::System::GetInstance().Scheduler(0)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(1)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(2)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(3)->GetThreadList());
+
    return item_list;
 }

@@ -90,6 +98,30 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeMutexInfo::GetChildren() cons
    return list;
 }

+WaitTreeCallstack::WaitTreeCallstack(const Kernel::Thread& thread) : thread(thread) {}
+
+QString WaitTreeCallstack::GetText() const {
+    return tr("Call stack");
+}
+
+std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() const {
+    std::vector<std::unique_ptr<WaitTreeItem>> list;
+
+    constexpr size_t BaseRegister = 29;
+    u64 base_pointer = thread.context.cpu_registers[BaseRegister];
+
+    while (base_pointer != 0) {
+        u64 lr = Memory::Read64(base_pointer + sizeof(u64));
+        if (lr == 0)
+            break;
+        list.push_back(
+            std::make_unique<WaitTreeText>(tr("0x%1").arg(lr - sizeof(u32), 16, 16, QChar('0'))));
+        base_pointer = Memory::Read64(base_pointer);
+    }
+
+    return list;
+}
+
 WaitTreeWaitObject::WaitTreeWaitObject(const Kernel::WaitObject& o) : object(o) {}

 bool WaitTreeExpandableItem::IsExpandable() const {
@@ -240,6 +272,9 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
    }

    list.push_back(std::make_unique<WaitTreeText>(tr("processor = %1").arg(processor)));
+    list.push_back(std::make_unique<WaitTreeText>(tr("ideal core = %1").arg(thread.ideal_core)));
+    list.push_back(
+        std::make_unique<WaitTreeText>(tr("affinity mask = %1").arg(thread.affinity_mask)));
    list.push_back(std::make_unique<WaitTreeText>(tr("thread id = %1").arg(thread.GetThreadId())));
    list.push_back(std::make_unique<WaitTreeText>(tr("priority = %1(current) / %2(normal)")
                                                      .arg(thread.current_priority)
@@ -258,6 +293,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
                                                            thread.IsSleepingOnWaitAll()));
    }

+    list.push_back(std::make_unique<WaitTreeCallstack>(thread));
+
    return list;
 }

--- a/src/yuzu/debugger/wait_tree.h
+++ b/src/yuzu/debugger/wait_tree.h
@@ -73,6 +73,17 @@ private:
    Kernel::SharedPtr<Kernel::Thread> owner;
 };

+class WaitTreeCallstack : public WaitTreeExpandableItem {
+    Q_OBJECT
+public:
+    explicit WaitTreeCallstack(const Kernel::Thread& thread);
+    QString GetText() const override;
+    std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
+
+private:
+    const Kernel::Thread& thread;
+};
+
 class WaitTreeWaitObject : public WaitTreeExpandableItem {
    Q_OBJECT
 public:
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -91,6 +91,7 @@ void Config::ReadValues() {

    // Core
    Settings::values.use_cpu_jit = sdl2_config->GetBoolean("Core", "use_cpu_jit", true);
+    Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);

    // Renderer
    Settings::values.resolution_factor =
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -80,6 +80,10 @@ touch_device=
 # 0: Interpreter (slow), 1 (default): JIT (fast)
 use_cpu_jit =

+# Whether to use multi-core for CPU emulation
+# 0 (default): Disabled, 1: Enabled
+use_multi_core=
+
 [Renderer]
 # Whether to use software or hardware rendering.
 # 0: Software, 1 (default): Hardware
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -7,8 +7,19 @@
 #include <string>
 #include <thread>

-// This needs to be included before getopt.h because the latter #defines symbols used by it
+#include "common/logging/backend.h"
+#include "common/logging/filter.h"
+#include "common/logging/log.h"
 #include "common/microprofile.h"
+#include "common/scm_rev.h"
+#include "common/scope_exit.h"
+#include "common/string_util.h"
+#include "core/core.h"
+#include "core/gdbstub/gdbstub.h"
+#include "core/loader/loader.h"
+#include "core/settings.h"
+#include "yuzu_cmd/config.h"
+#include "yuzu_cmd/emu_window/emu_window_sdl2.h"

 #ifdef _MSC_VER
 #include <getopt.h>
@@ -24,19 +35,6 @@
 #include <shellapi.h>
 #endif

-#include "common/logging/backend.h"
-#include "common/logging/filter.h"
-#include "common/logging/log.h"
-#include "common/scm_rev.h"
-#include "common/scope_exit.h"
-#include "common/string_util.h"
-#include "core/core.h"
-#include "core/gdbstub/gdbstub.h"
-#include "core/loader/loader.h"
-#include "core/settings.h"
-#include "yuzu_cmd/config.h"
-#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
-
 #ifdef _WIN32
 extern "C" {
 // tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable
Author	SHA1	Message	Date
greggameplayer	a3f22ebc18	last clang-format fix	2018-06-04 18:51:44 +02:00
greggameplayer	8995ad6f29	delete one other trailing whitespace	2018-06-04 18:44:45 +02:00
greggameplayer	241d3c1473	fix some clang-format	2018-06-04 18:36:11 +02:00
greggameplayer	b4043ea231	delete trailing whitespace	2018-06-04 00:07:02 +02:00
greggameplayer	0f7b1a2818	fix clang-format	2018-06-03 22:52:31 +02:00
greggameplayer	50115fefa6	Add some IoctlCommand with their params to nvhost_gpu	2018-06-03 22:49:43 +02:00
bunnei	bb9d39b8fe	Merge pull request #494 from bunnei/shader-tex gl_shader_decompiler: Implement TEX, fixes for TEXS.	2018-06-03 12:05:38 -04:00
bunnei	27c0f9e02d	Merge pull request #495 from bunnei/improve-rro gl_shader_decompiler: Implement RRO as a register move.	2018-06-03 12:05:26 -04:00
bunnei	41faeeeb03	Merge pull request #484 from mailwl/nvhost-nvdec Services/nvdrv: add '/dev/nvhost-nvdec' device	2018-06-03 11:18:00 -04:00
bunnei	63270e588b	Merge pull request #496 from Subv/waitprocesswidekey_timeout Kernel/Threads: A thread waking up by timeout from a WaitProcessWideKey may already have an assigned lock owner.	2018-06-03 11:15:49 -04:00
bunnei	e54ea773fc	gl_shader_decompiler: Implement RRO as a register move.	2018-06-03 11:14:31 -04:00
bunnei	0d64ddc6dd	Merge pull request #497 from Subv/dxn1 GPU: Implemented the DXN1 (BC4) texture format.	2018-06-03 11:05:04 -04:00
Subv	9cd87a6352	Kernel/Threads: A thread waking up by timeout from a WaitProcessWideKey may already have an assigned lock owner. This situation may happen like so: Thread 1 with low priority calls WaitProcessWideKey with timeout. Thread 2 with high priority calls WaitProcessWideKey without timeout. Thread 3 calls SignalProcessWideKey - Thread 2 acquires the lock and awakens. - Thread 1 can't acquire the lock and is put to sleep with the lock owner being Thread 2. Thread 1's timeout expires, with the lock owner still being set to Thread 2.	2018-06-02 14:06:35 -05:00
Subv	99f9d47d16	GPU: Implemented the DXN1 (BC4) texture format.	2018-06-02 13:17:09 -05:00
bunnei	bbbe34429e	Merge pull request #492 from mailwl/time Service/time: implement posix time to calendar conversion	2018-06-01 10:14:20 -04:00
mailwl	11568c2ea3	Service/time: implement posix time to calendar conversion	2018-06-01 09:40:28 +03:00
bunnei	888eb345c0	gl_shader_decompiler: Implement TEX instruction.	2018-05-31 23:36:45 -04:00
bunnei	4c727d0ba8	gl_shader_decompiler: Support multi-destination for TEXS.	2018-05-31 22:57:32 -04:00
bunnei	bdd68fc210	Merge pull request #488 from Subv/thread_masks Kernel/SVC: Corrected the behavior of svcSetThreadCoreMask for core values -2 and -3.	2018-05-31 18:22:18 -04:00
bunnei	f1bded1270	Merge pull request #491 from bunnei/rgba16f gl_rasterizer_cache: Implement PixelFormat RGBA16F.	2018-05-31 18:16:32 -04:00
bunnei	49309b5848	gl_rasterizer_cache: Assert that component type is UNorm or format is RGBA16F.	2018-05-30 22:50:41 -04:00
Subv	c02d7c8ce7	Kernel/Thread: Corrected a typo that caused the affinity mask to never be changed.	2018-05-30 21:36:29 -05:00
Subv	3957b0c34e	Kernel/SVC: Support special core values -2 and -3 in svcSetThreadCoreMask. Also added some proper error handling.	2018-05-30 21:36:29 -05:00
bunnei	ca5a4a704b	gl_rasterizer_cache: Implement PixelFormat RGBA16F.	2018-05-30 22:24:07 -04:00
bunnei	15086a22be	Merge pull request #489 from Subv/vertexid Shaders: Implemented reading the gl_InstanceID and gl_VertexID variables in the vertex shader.	2018-05-30 14:10:48 -04:00
greggameplayer	94fecef137	add IPC CommandType & Some HID FunctionInfo (#487 ) * add some CommandType * add some hid FunctionInfo * add some other HID FunctionInfo * delete non useful comments	2018-05-30 14:09:21 -04:00
Subv	d1f9c750a6	Kernel/Thread: Corrected a typo in an assert about the processor id.	2018-05-30 11:32:46 -05:00
Subv	99f12b05fa	Shaders: Implemented reading the gl_InstanceID and gl_VertexID variables in the vertex shader.	2018-05-30 10:58:03 -05:00
Sebastian Valle	8df011a57f	Merge pull request #483 from bunnei/sonic Several GPU fixes to boot Sonic Mania	2018-05-30 07:31:46 -05:00
mailwl	9a273bb23b	Services/nvdrv: add '/dev/nvhost-nvdec' device	2018-05-30 12:49:28 +03:00
bunnei	6fcc7e9c36	gl_shader_decompiler: F2F_R instruction: Implement abs.	2018-05-29 23:52:54 -04:00
bunnei	c8e1383fa9	Merge pull request #482 from Subv/r8 GPU: Implemented the R8 texture format (0x1D)	2018-05-29 23:42:55 -04:00
bunnei	68937a662d	gl_shader_decompiler: Partially implement F2F_R instruction.	2018-05-29 23:10:44 -04:00
Subv	734106dcb9	GPU: Implemented the R8 texture format (0x1D)	2018-05-29 21:49:37 -05:00
bunnei	6306655665	nvhost_ctrl: Stub out IocCtrlEventRegister.	2018-05-29 22:39:31 -04:00
bunnei	0658973a4e	nvhost_ctrl: Stub out IocCtrlEventWaitAsyncCommand.	2018-05-29 22:35:41 -04:00
bunnei	0d843eaba6	gl_rasterize_cache: Invert order of tex format RGB565.	2018-05-29 22:16:18 -04:00
bunnei	5a763e8a5a	Merge pull request #480 from mailwl/bcat Service/BCAT: add module and services	2018-05-29 19:45:23 -04:00
greggameplayer	220d4672df	add all the known TextureFormat (#474 )	2018-05-28 19:26:17 -04:00
mailwl	7757cc1a7f	Service/BCAT: add module and services	2018-05-28 16:46:56 +03:00
bunnei	d809f65827	Merge pull request #472 from bunnei/greater-equal gl_shader_decompiler: Implement GetPredicateComparison GreaterEqual.	2018-05-27 12:14:30 -04:00
bunnei	7f155ba713	Merge pull request #476 from Subv/a1bgr5 GPU: Implemented the A1B5G5R5 texture format (0x14)	2018-05-27 12:14:08 -04:00
bunnei	7029daa32e	Merge pull request #475 from ogniK5377/nvos-getconfig NvOsGetConfigU32 should return null instead of 0 for default output value	2018-05-27 12:10:07 -04:00
bunnei	15c388e0d6	Merge pull request #473 from bunnei/get-display-version am: Stub IApplicationFunctions GetDisplayVersion.	2018-05-27 12:00:32 -04:00
Sebastian Valle	9e30f5574f	Merge pull request #471 from bunnei/fmnmx shader_bytecode: Implement other variants of FMNMX.	2018-05-27 09:04:47 -05:00
Subv	7ddc872b52	GPU: Implemented the A1B5G5R5 texture format (0x14)	2018-05-27 09:02:05 -05:00
David Marcec	6138075df0	NvOsGetConfigU32 should return null instead of 0 for default output	2018-05-26 17:48:09 -07:00
bunnei	0d681f7a7a	am: Stub IApplicationFunctions GetDisplayVersion.	2018-05-26 00:21:59 -04:00
bunnei	c23ce3365d	gl_shader_decompiler: Implement GetPredicateComparison GreaterEqual.	2018-05-25 23:21:29 -04:00
bunnei	ee53688ca7	shader_bytecode: Implement other variants of FMNMX.	2018-05-25 23:18:50 -04:00
greggameplayer	b16e5c6a81	Add & correct miscellaneous things (#470 ) * add some InfoType * correct OpenApplicationProxy cmd number * add IDisplayController functions * fix clang-format * add more system languages	2018-05-25 22:31:54 -04:00
bunnei	87f21657f8	Merge pull request #466 from mailwl/nv-timeout Stub NVGPU_IOCTL_CHANNEL_SET_TIMEOUT	2018-05-25 22:31:06 -04:00
David	e6df4b37db	GetAudioRendererWorkBufferSize impl (#465 ) * GetAudioRendererWorkBufferSize impl Impl of GetAudioRendererWorkBufferSize based on RE, if this can be cleaned up, please contribute! * Naming conventions * Removed unneeded placeholder * lioncache changes * fixed const * switched to Common::AlignUp	2018-05-25 22:30:02 -04:00
bunnei	aee356bd10	Merge pull request #468 from Subv/compound_preds Shader: Implemented compound predicates in the fset and fsetp instructions	2018-05-25 22:28:47 -04:00
bunnei	cd7665218d	Merge pull request #469 from Subv/channel_rebind GPU: Allow command lists to rebind a channel to another engine in the middle of the command list.	2018-05-25 22:27:58 -04:00
Subv	e2cdf54177	Shader: Implemented compound predicates in fset. You can specify a predicate in the fset instruction: Result = ((Value1 Comp Value2) OP P0) ? 1.0 : 0.0;	2018-05-24 17:39:59 -05:00
David	e3a92b09ba	Stubbed NVGPU_GPU_IOCTL_ZBC_SET_TABLE (#463 ) We have no clue on what this actually does yet so stubbing it since it's just input only should be fine for now	2018-05-24 18:36:12 -04:00
Subv	e2db7a83f6	GPU: Allow command lists to rebind a channel to another engine in the middle of the command list.	2018-05-24 17:32:46 -05:00
Subv	126270d963	Shader: Implemented compound predicates in fsetp. You can specify three predicates in an fsetp instruction: P1 = (Value1 Comp Value2) OP P0; P2 = !(Value1 Comp Value2) OP P0;	2018-05-24 17:22:36 -05:00
mailwl	e6a87428ae	Stub NVGPU_IOCTL_CHANNEL_SET_TIMEOUT Used in Nintendo Labo ToyCon 1&2	2018-05-24 16:49:34 +03:00
bunnei	55e6296e71	Merge pull request #464 from bunnei/fix-msvc yuzu_cmd: Fix project for latest msvc.	2018-05-24 09:01:07 -04:00
bunnei	1ce7942dc2	yuzu_cmd: Fix project for latest msvc.	2018-05-23 21:51:49 -04:00
bunnei	6b6287dda0	Merge pull request #462 from ogniK5377/hid-fix Fix deadlocks caused from HID having too many layouts	2018-05-23 20:50:12 -04:00
David Marcec	c74d24f841	Fix deadlocks caused from HID having too many layouts Games such as SMO deadlock if we have more than 2 layouts	2018-05-23 16:00:25 -07:00
bunnei	4cb92b776c	Merge pull request #460 from greggameplayer/patch-6 Add & correct some error modules	2018-05-23 17:13:18 -04:00
bunnei	a55f112cb1	Merge pull request #459 from greggameplayer/patch-5 Add ioctl commands with their params and size check	2018-05-23 17:12:56 -04:00
bunnei	5cdc277dd2	Merge pull request #461 from lioncash/dynarmic externals: Update dynarmic	2018-05-23 17:12:40 -04:00
Lioncash	74efdd6928	externals: Update dynarmic Updates dynarmic to revision 990a569b7a5f2518fe08682f5ebf8536e5388d66	2018-05-23 15:26:23 -04:00
bunnei	3825b703fa	Merge pull request #454 from Subv/signal_processwide Kernel/SVC: Signal the highest priority threads first in svcSignalProcessWideKey	2018-05-23 10:28:23 -04:00
greggameplayer	1efb81a61d	Add & correct some error modules	2018-05-23 14:22:42 +02:00
greggameplayer	3c26b7179d	change some functions according to the changes made previously	2018-05-23 14:09:24 +02:00
greggameplayer	8c648b59cd	correct placement and add size check	2018-05-23 12:34:42 +02:00
greggameplayer	f217d6c66f	Add ioctl commands with their params and size check	2018-05-23 12:32:37 +02:00
David	58d9078742	Implemented NVHOST_IOCTL_CHANNEL_GET_WAITBASE (#440 ) * Implemented NVHOST_IOCTL_CHANNEL_GET_WAITBASE struct + 4 seems to be hard coded at 0 and struct + 0 seems to be ignored? * IocGetWaitbase -> IocChannelGetWaitbaseCommand * Added super late fixes	2018-05-22 17:41:19 -04:00
bunnei	58857b9f46	Merge pull request #456 from Subv/unmap_buffer Implemented nvhost-as-gpu's UnmapBuffer and nvmap's Free ioctls.	2018-05-20 23:54:50 -04:00
greggameplayer	c6eaf0b2cf	Correct audio command numbers & add or rename some functions (#455 ) * Add unknown function at the number command 2 * correct audout:u commands numbers * correct audrec:u cmd number & add Unknown function * correct IAudioDevice command numbers * correct codecctl cmd numbers & rename the 8 function * correct place of unknown function & fix clang-format	2018-05-20 23:48:44 -04:00
bunnei	693f78e6c2	Merge pull request #457 from Subv/mutex_waiters Mutex: Do not assert when the mutex waiting threads list isn't empty on mutex release.	2018-05-20 23:44:44 -04:00
bunnei	898f0fa029	Merge pull request #458 from Subv/fmnmx Shaders: Implemented the FMNMX shader instruction.	2018-05-20 23:44:07 -04:00
bunnei	ff54287a73	Merge pull request #445 from greggameplayer/patch-2 Properly rename functions of Fatal Module & add ThrowFatal to this module	2018-05-20 23:42:57 -04:00
Sebastian Valle	882111c4f2	Merge pull request #453 from Subv/thread_callstack Qt/WaitTree: Display the callstack for each thread in the wait tree widget	2018-05-20 20:01:08 -05:00
Sebastian Valle	6486544e09	Merge pull request #452 from Subv/psetp ShadersDecompiler: Added decoding for the PSETP instruction.	2018-05-20 20:00:55 -05:00
Sebastian Valle	2dbfcd32d7	Merge pull request #451 from Subv/gl_array_size GLRenderer: Remove unused vertex buffer and increase the size of the stream buffer to 128 MB.	2018-05-20 20:00:40 -05:00
Subv	8440cef223	Shaders: Implemented the FMNMX shader instruction.	2018-05-20 17:53:06 -05:00
Subv	fd500d3da6	Mutex: Do not assert when the mutex waiting threads list isn't empty on mutex release. A thread may own multiple mutexes at the same time, and only release one of them while other threads are waiting for the other mutexes.	2018-05-20 14:41:58 -05:00
Subv	525492428d	GPU: Implemented the nvmap Free ioctl. It releases a reference to an nvmap object	2018-05-20 14:25:57 -05:00
Subv	72b5c448cf	GPU: Implemented nvhost-as-gpu's UnmapBuffer ioctl. It removes a mapping previously created with the MapBufferEx ioctl.	2018-05-20 14:25:56 -05:00
Sebastian Valle	03388c3071	Merge pull request #450 from Subv/shader_link_error GLRenderer: Log the shader source code when program linking fails.	2018-05-20 12:57:32 -05:00
Sebastian Valle	353e1dd7e4	Merge pull request #443 from ogniK5377/ipc-500 Added IPC RequestWithContext & ControlWithContext	2018-05-19 17:03:30 -05:00
greggameplayer	a215f63235	Add and correct some Error Modules (#444 ) * Add and correct some Error Modules	2018-05-19 17:02:24 -05:00
Sebastian Valle	dc26601860	Merge pull request #442 from Hexagon12/nfp-service-names Updated nfp to have more service names	2018-05-19 17:01:23 -05:00
Subv	2a35a36251	Kernel/SVC: Signal the highest priority threads first in svcSignalProcessWideKey.	2018-05-19 16:58:30 -05:00
Subv	c74f2555b6	Kernel/Threads: Reschedule the proper core when operating on that core's threads.	2018-05-19 16:57:44 -05:00
Subv	fab3dd98fe	SVC: Removed unused WaitSynchronization1 function	2018-05-19 16:56:33 -05:00
Subv	c50393e066	Qt/WaitTree: Display the callstack for each thread in the wait tree widget.	2018-05-19 16:52:49 -05:00
Subv	a056d5ad8c	ShadersDecompiler: Added decoding for the PSETP instruction.	2018-05-19 11:41:14 -05:00
Subv	98b143c2d6	GLRenderer: Remove unused hw_vao_enabled_attributes variable.	2018-05-19 11:36:38 -05:00
Subv	370ab5df9b	GLRenderer: Remove unused vertex buffer and increase the size of the stream buffer to 128 MB. The stream buffer is where all the vertex data is copied, some games require this to be much bigger than the 4 MB we used to have.	2018-05-19 11:36:09 -05:00
Subv	21959ddfef	GLRenderer: Log the shader source code when program linking fails.	2018-05-19 11:19:34 -05:00
greggameplayer	abe79b2724	rename fatal:u functions & add ThrowFatal	2018-05-18 23:32:22 +02:00
greggameplayer	536cfb13e6	Properly update fatal.h void name	2018-05-18 23:30:56 +02:00
greggameplayer	e35cfc1b03	Properly rename fatal module functions	2018-05-18 23:28:30 +02:00
David Marcec	fd86cdb2e2	Added RequestWithContext & ControlWithContext	2018-05-17 14:03:52 -07:00
Hexagon12	0984e9d601	Updated nfp with more service names	2018-05-13 13:08:58 +03:00
bunnei	1b5c02fc37	Merge pull request #436 from bunnei/multi-core Initial support for multi-core	2018-05-11 12:59:23 -04:00
bunnei	e07218906d	Merge pull request #439 from ogniK5377/GetTPCMasks More accurate GetTPCMasks impl	2018-05-11 12:57:20 -04:00
David Marcec	e7b0e8a3cc	More accurate GetTPCMasks impl	2018-05-10 21:01:39 -07:00
bunnei	811dae12f9	core: Add several missing docstrings.	2018-05-10 19:34:54 -04:00
bunnei	46ec9a9bc9	thread: Rename mask to affinity_masks.	2018-05-10 19:34:53 -04:00
bunnei	edc52250b8	core: Run all CPU cores separately, even in single-thread mode.	2018-05-10 19:34:53 -04:00
bunnei	fbd7afefaa	thread: Support core change on ResumeFromWait and improve ChangeCore.	2018-05-10 19:34:53 -04:00
bunnei	91af2f94e8	scheduler: Protect scheduling functions with a global mutex.	2018-05-10 19:34:52 -04:00
bunnei	e6671190a5	wait_tree: Add ideal core and affinity mask.	2018-05-10 19:34:52 -04:00
bunnei	4822765fef	thread: Initialize ideal_core and mask members.	2018-05-10 19:34:52 -04:00
bunnei	8aa5d25f82	threading: Reschedule only on cores that are necessary.	2018-05-10 19:34:52 -04:00
bunnei	d6e3cd9a17	svc: Implement GetThreadCoreMask and SetThreadCoreMask.	2018-05-10 19:34:51 -04:00
bunnei	6ea8b3ef60	thread: Implement ChangeCore function.	2018-05-10 19:34:50 -04:00
bunnei	1c36f2a798	svc: SignalProcessWideKey should apply to all cores.	2018-05-10 19:34:49 -04:00
bunnei	6a890023e9	svc: Implement GetCurrentProcessorNumber.	2018-05-10 19:34:49 -04:00
bunnei	5c0421ebd8	wait_tree: Show all threads on all schedulers.	2018-05-10 19:34:48 -04:00
bunnei	9bf2a428f9	core: Add a configuration setting for use_multi_core.	2018-05-10 19:34:47 -04:00
bunnei	cba69fdcd4	core: Support session close with multicore.	2018-05-10 19:34:47 -04:00
bunnei	a434fdcb10	core: Implement multicore support.	2018-05-10 19:34:46 -04:00
bunnei	9776ff9179	core: Create a thread for each CPU core, keep in lock-step with a barrier.	2018-05-10 19:34:46 -04:00
bunnei	5590245930	core: Move common CPU core things to its own class.	2018-05-10 19:34:46 -04:00