"Merge PR 1012"

"Merge PR 1340"
"Merge PR 1703"
2019-09-24 12:01:23 +00:00 · 2019-09-24 12:01:22 +00:00 · 2019-09-24 12:01:22 +00:00 · 2019-09-24 12:01:21 +00:00 · 2019-09-24 12:01:20 +00:00 · 2019-09-24 12:01:19 +00:00
64 changed files with 2420 additions and 758 deletions
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -28,18 +28,14 @@ __declspec(noinline, noreturn)
 }

 #define ASSERT(_a_)                                                                                \
-    do                                                                                             \
-        if (!(_a_)) {                                                                              \
-            assert_noinline_call([] { LOG_CRITICAL(Debug, "Assertion Failed!"); });                \
-        }                                                                                          \
-    while (0)
+    if (!(_a_)) {                                                                                  \
+        LOG_CRITICAL(Debug, "Assertion Failed!");                                                  \
+    }

 #define ASSERT_MSG(_a_, ...)                                                                       \
-    do                                                                                             \
-        if (!(_a_)) {                                                                              \
-            assert_noinline_call([&] { LOG_CRITICAL(Debug, "Assertion Failed!\n" __VA_ARGS__); }); \
-        }                                                                                          \
-    while (0)
+    if (!(_a_)) {                                                                                  \
+        LOG_CRITICAL(Debug, "Assertion Failed! " __VA_ARGS__);                                     \
+    }

 #define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!")
 #define UNREACHABLE_MSG(...) ASSERT_MSG(false, __VA_ARGS__)
--- a/src/common/common_paths.h
+++ b/src/common/common_paths.h
@@ -36,6 +36,7 @@
 #define LOAD_DIR "load"
 #define DUMP_DIR "dump"
 #define SHADER_DIR "shader"
+#define RESCALING_DIR "rescaling"
 #define LOG_DIR "log"

 // Filenames
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -695,6 +695,7 @@ const std::string& GetUserPath(UserPath path, const std::string& new_path) {
        paths.emplace(UserPath::LoadDir, user_path + LOAD_DIR DIR_SEP);
        paths.emplace(UserPath::DumpDir, user_path + DUMP_DIR DIR_SEP);
        paths.emplace(UserPath::ShaderDir, user_path + SHADER_DIR DIR_SEP);
+        paths.emplace(UserPath::RescalingDir, user_path + RESCALING_DIR DIR_SEP);
        paths.emplace(UserPath::SysDataDir, user_path + SYSDATA_DIR DIR_SEP);
        paths.emplace(UserPath::KeysDir, user_path + KEYS_DIR DIR_SEP);
        // TODO: Put the logs in a better location for each OS
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -33,6 +33,7 @@ enum class UserPath {
    LoadDir,
    DumpDir,
    ShaderDir,
+    RescalingDir,
    SysDataDir,
    UserDir,
 };
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -248,6 +248,9 @@ void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) {
 }

 void ARM_Dynarmic::PrepareReschedule() {
+    if (jit == nullptr)
+        return;
+
    jit->HaltExecution();
 }

--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -399,6 +399,12 @@ void System::PrepareReschedule() {
    CurrentCpuCore().PrepareReschedule();
 }

+void System::PrepareReschedule(const u32 core_index) {
+    if (core_index < GlobalScheduler().CpuCoresCount()) {
+        CpuCore(core_index).PrepareReschedule();
+    }
+}
+
 PerfStatsResults System::GetAndResetPerfStats() {
    return impl->GetAndResetPerfStats();
 }
@@ -439,6 +445,16 @@ const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const {
    return CpuCore(core_index).Scheduler();
 }

+/// Gets the global scheduler
+Kernel::GlobalScheduler& System::GlobalScheduler() {
+    return impl->kernel.GlobalScheduler();
+}
+
+/// Gets the global scheduler
+const Kernel::GlobalScheduler& System::GlobalScheduler() const {
+    return impl->kernel.GlobalScheduler();
+}
+
 Kernel::Process* System::CurrentProcess() {
    return impl->kernel.CurrentProcess();
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -25,6 +25,7 @@ class VfsFilesystem;
 } // namespace FileSys

 namespace Kernel {
+class GlobalScheduler;
 class KernelCore;
 class Process;
 class Scheduler;
@@ -179,6 +180,9 @@ public:
    /// Prepare the core emulation for a reschedule
    void PrepareReschedule();

+    /// Prepare the core emulation for a reschedule
+    void PrepareReschedule(u32 core_index);
+
    /// Gets and resets core performance statistics
    PerfStatsResults GetAndResetPerfStats();

@@ -233,6 +237,12 @@ public:
    /// Gets the scheduler for the CPU core with the specified index
    const Kernel::Scheduler& Scheduler(std::size_t core_index) const;

+    /// Gets the global scheduler
+    Kernel::GlobalScheduler& GlobalScheduler();
+
+    /// Gets the global scheduler
+    const Kernel::GlobalScheduler& GlobalScheduler() const;
+
    /// Provides a pointer to the current process
    Kernel::Process* CurrentProcess();

--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -52,7 +52,8 @@ bool CpuBarrier::Rendezvous() {

 Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
         std::size_t core_index)
-    : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
+    : cpu_barrier{cpu_barrier}, global_scheduler{system.GlobalScheduler()},
+      core_timing{system.CoreTiming()}, core_index{core_index} {
 #ifdef ARCHITECTURE_x86_64
    arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
 #else
@@ -60,7 +61,7 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba
    LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif

-    scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
+    scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index);
 }

 Cpu::~Cpu() = default;
@@ -81,6 +82,8 @@ void Cpu::RunLoop(bool tight_loop) {
        return;
    }

+    Reschedule();
+
    // If we don't have a currently active thread then don't execute instructions,
    // instead advance to the next event and try to yield to the next thread
    if (Kernel::GetCurrentThread() == nullptr) {
@@ -92,7 +95,6 @@ void Cpu::RunLoop(bool tight_loop) {
            core_timing.Advance();
        }

-        PrepareReschedule();
    } else {
        if (IsMainCore()) {
            core_timing.Advance();
@@ -114,18 +116,14 @@ void Cpu::SingleStep() {

 void Cpu::PrepareReschedule() {
    arm_interface->PrepareReschedule();
-    reschedule_pending = true;
 }

 void Cpu::Reschedule() {
-    if (!reschedule_pending) {
-        return;
-    }
-
-    reschedule_pending = false;
    // Lock the global kernel mutex when we manipulate the HLE state
-    std::lock_guard lock{HLE::g_hle_lock};
-    scheduler->Reschedule();
+    std::lock_guard lock(HLE::g_hle_lock);
+
+    global_scheduler.SelectThread(core_index);
+    scheduler->TryDoContextSwitch();
 }

 } // namespace Core
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -12,8 +12,9 @@
 #include "common/common_types.h"

 namespace Kernel {
+class GlobalScheduler;
 class Scheduler;
-}
+} // namespace Kernel

 namespace Core {
 class System;
@@ -90,6 +91,7 @@ private:

    std::unique_ptr<ARM_Interface> arm_interface;
    CpuBarrier& cpu_barrier;
+    Kernel::GlobalScheduler& global_scheduler;
    std::unique_ptr<Kernel::Scheduler> scheduler;
    Timing::CoreTiming& core_timing;

--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -202,13 +202,11 @@ void RegisterModule(std::string name, VAddr beg, VAddr end, bool add_elf_ext) {
 }

 static Kernel::Thread* FindThreadById(s64 id) {
-    for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
-        const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList();
-        for (auto& thread : threads) {
-            if (thread->GetThreadID() == static_cast<u64>(id)) {
-                current_core = core;
-                return thread.get();
-            }
+    const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList();
+    for (auto& thread : threads) {
+        if (thread->GetThreadID() == static_cast<u64>(id)) {
+            current_core = thread->GetProcessorID();
+            return thread.get();
        }
    }
    return nullptr;
@@ -646,11 +644,9 @@ static void HandleQuery() {
        SendReply(buffer.c_str());
    } else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) {
        std::string val = "m";
-        for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
-            const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList();
-            for (const auto& thread : threads) {
-                val += fmt::format("{:x},", thread->GetThreadID());
-            }
+        const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList();
+        for (const auto& thread : threads) {
+            val += fmt::format("{:x},", thread->GetThreadID());
        }
        val.pop_back();
        SendReply(val.c_str());
@@ -660,13 +656,11 @@ static void HandleQuery() {
        std::string buffer;
        buffer += "l<?xml version=\"1.0\"?>";
        buffer += "<threads>";
-        for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
-            const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList();
-            for (const auto& thread : threads) {
-                buffer +=
-                    fmt::format(R"*(<thread id="{:x}" core="{:d}" name="Thread {:x}"></thread>)*",
-                                thread->GetThreadID(), core, thread->GetThreadID());
-            }
+        const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList();
+        for (const auto& thread : threads) {
+            buffer +=
+                fmt::format(R"*(<thread id="{:x}" core="{:d}" name="Thread {:x}"></thread>)*",
+                            thread->GetThreadID(), thread->GetProcessorID(), thread->GetThreadID());
        }
        buffer += "</threads>";
        SendReply(buffer.c_str());
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -22,6 +22,7 @@ namespace Kernel {
 namespace {
 // Wake up num_to_wake (or all) threads in a vector.
 void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
+    auto& system = Core::System::GetInstance();
    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
    // them all.
    std::size_t last = waiting_threads.size();
@@ -35,6 +36,7 @@ void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_
        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
        waiting_threads[i]->SetArbiterWaitAddress(0);
        waiting_threads[i]->ResumeFromWait();
+        system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
    }
 }
 } // Anonymous namespace
@@ -169,30 +171,22 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {

    current_thread->WakeAfterDelay(timeout);

-    system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(current_thread->GetProcessorID());
    return RESULT_TIMEOUT;
 }

 std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
-    const auto RetrieveWaitingThreads = [this](std::size_t core_index,
-                                               std::vector<SharedPtr<Thread>>& waiting_threads,
-                                               VAddr arb_addr) {
-        const auto& scheduler = system.Scheduler(core_index);
-        const auto& thread_list = scheduler.GetThreadList();
-
-        for (const auto& thread : thread_list) {
-            if (thread->GetArbiterWaitAddress() == arb_addr) {
-                waiting_threads.push_back(thread);
-            }
-        }
-    };

    // Retrieve all threads that are waiting for this address.
    std::vector<SharedPtr<Thread>> threads;
-    RetrieveWaitingThreads(0, threads, address);
-    RetrieveWaitingThreads(1, threads, address);
-    RetrieveWaitingThreads(2, threads, address);
-    RetrieveWaitingThreads(3, threads, address);
+    const auto& scheduler = system.GlobalScheduler();
+    const auto& thread_list = scheduler.GetThreadList();
+
+    for (const auto& thread : thread_list) {
+        if (thread->GetArbiterWaitAddress() == address) {
+            threads.push_back(thread);
+        }
+    }

    // Sort them by priority, such that the highest priority ones come first.
    std::sort(threads.begin(), threads.end(),
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -18,6 +18,7 @@
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
@@ -88,7 +89,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
 }

 struct KernelCore::Impl {
-    explicit Impl(Core::System& system) : system{system} {}
+    explicit Impl(Core::System& system) : system{system}, global_scheduler{system} {}

    void Initialize(KernelCore& kernel) {
        Shutdown();
@@ -140,6 +141,7 @@ struct KernelCore::Impl {
    // Lists all processes that exist in the current session.
    std::vector<SharedPtr<Process>> process_list;
    Process* current_process = nullptr;
+    Kernel::GlobalScheduler global_scheduler;

    SharedPtr<ResourceLimit> system_resource_limit;

@@ -203,6 +205,14 @@ const std::vector<SharedPtr<Process>>& KernelCore::GetProcessList() const {
    return impl->process_list;
 }

+Kernel::GlobalScheduler& KernelCore::GlobalScheduler() {
+    return impl->global_scheduler;
+}
+
+const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {
+    return impl->global_scheduler;
+}
+
 void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
    impl->named_ports.emplace(std::move(name), std::move(port));
 }
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -25,6 +25,7 @@ class HandleTable;
 class Process;
 class ResourceLimit;
 class Thread;
+class GlobalScheduler;

 /// Represents a single instance of the kernel.
 class KernelCore {
@@ -75,6 +76,12 @@ public:
    /// Retrieves the list of processes.
    const std::vector<SharedPtr<Process>>& GetProcessList() const;

+    /// Gets the sole instance of the global scheduler
+    Kernel::GlobalScheduler& GlobalScheduler();
+
+    /// Gets the sole instance of the global scheduler
+    const Kernel::GlobalScheduler& GlobalScheduler() const;
+
    /// Adds a port to the named port table
    void AddNamedPort(std::string name, SharedPtr<ClientPort> port);

--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -7,6 +7,7 @@

 #include "common/assert.h"
 #include "core/core.h"
+#include "core/core_cpu.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
@@ -78,7 +79,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
    // thread.
    ASSERT(requesting_thread == current_thread);

-    const u32 addr_value = Memory::Read32(address);
+    u32 addr_value = Memory::Read32(address);

    // If the mutex isn't being held, just return success.
    if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
@@ -89,6 +90,20 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
        return ERR_INVALID_HANDLE;
    }

+    // This a workaround where an unknown bug writes the mutex value to give ownership to a cond var
+    // waiting thread.
+    if (holding_thread->GetStatus() == ThreadStatus::WaitCondVar) {
+        if (holding_thread->GetMutexWaitAddress() == address) {
+            Release(address, holding_thread.get());
+            addr_value = Memory::Read32(address);
+            if (addr_value == 0)
+                return RESULT_SUCCESS;
+            else {
+                holding_thread = handle_table.Get<Thread>(addr_value & Mutex::MutexOwnerMask);
+            }
+        }
+    }
+
    // Wait until the mutex is released
    current_thread->SetMutexWaitAddress(address);
    current_thread->SetWaitHandle(requesting_thread_handle);
@@ -104,14 +119,13 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
    return RESULT_SUCCESS;
 }

-ResultCode Mutex::Release(VAddr address) {
+ResultCode Mutex::Release(VAddr address, Thread* holding_thread) {
    // The mutex address must be 4-byte aligned
    if ((address % sizeof(u32)) != 0) {
        return ERR_INVALID_ADDRESS;
    }

-    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
-    auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address);
+    auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(holding_thread, address);

    // There are no more threads waiting for the mutex, release it completely.
    if (thread == nullptr) {
@@ -120,7 +134,7 @@ ResultCode Mutex::Release(VAddr address) {
    }

    // Transfer the ownership of the mutex from the previous owner to the new one.
-    TransferMutexOwnership(address, current_thread, thread);
+    TransferMutexOwnership(address, holding_thread, thread);

    u32 mutex_value = thread->GetWaitHandle();

@@ -140,6 +154,11 @@ ResultCode Mutex::Release(VAddr address) {
    thread->SetMutexWaitAddress(0);
    thread->SetWaitHandle(0);

+    if (thread->GetProcessorID() >= 0)
+        system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    if (holding_thread->GetProcessorID() >= 0)
+        system.CpuCore(holding_thread->GetProcessorID()).PrepareReschedule();
+
    return RESULT_SUCCESS;
 }
 } // namespace Kernel
--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -29,7 +29,7 @@ public:
                          Handle requesting_thread_handle);

    /// Releases the mutex at the specified address.
-    ResultCode Release(VAddr address);
+    ResultCode Release(VAddr address, Thread* holding_thread);

 private:
    Core::System& system;
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -213,10 +213,7 @@ void Process::PrepareForTermination() {
        }
    };

-    stop_threads(system.Scheduler(0).GetThreadList());
-    stop_threads(system.Scheduler(1).GetThreadList());
-    stop_threads(system.Scheduler(2).GetThreadList());
-    stop_threads(system.Scheduler(3).GetThreadList());
+    stop_threads(system.GlobalScheduler().GetThreadList());

    FreeTLSRegion(tls_region_address);
    tls_region_address = 0;
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -1,8 +1,13 @@
 // Copyright 2018 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
+//
+// SelectThreads, Yield functions originally by TuxSH.
+// licensed under GPLv2 or later under exception provided by the author.

 #include <algorithm>
+#include <set>
+#include <unordered_set>
 #include <utility>

 #include "common/assert.h"
@@ -17,56 +22,277 @@

 namespace Kernel {

-std::mutex Scheduler::scheduler_mutex;
+GlobalScheduler::GlobalScheduler(Core::System& system) : system{system} {
+    reselection_pending = false;
+}

-Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
-    : cpu_core{cpu_core}, system{system} {}
+void GlobalScheduler::AddThread(SharedPtr<Thread> thread) {
+    thread_list.push_back(std::move(thread));
+}

-Scheduler::~Scheduler() {
-    for (auto& thread : thread_list) {
-        thread->Stop();
+void GlobalScheduler::RemoveThread(const Thread* thread) {
+    thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
+                      thread_list.end());
+}
+
+/*
+ * UnloadThread selects a core and forces it to unload its current thread's context
+ */
+void GlobalScheduler::UnloadThread(s32 core) {
+    Scheduler& sched = system.Scheduler(core);
+    sched.UnloadThread();
+}
+
+/*
+ * SelectThread takes care of selecting the new scheduled thread.
+ * It does it in 3 steps:
+ * - First a thread is selected from the top of the priority queue. If no thread
+ * is obtained then we move to step two, else we are done.
+ * - Second we try to get a suggested thread that's not assigned to any core or
+ * that is not the top thread in that core.
+ * - Third is no suggested thread is found, we do a second pass and pick a running
+ * thread in another core and swap it with its current thread.
+ */
+void GlobalScheduler::SelectThread(u32 core) {
+    const auto update_thread = [](Thread* thread, Scheduler& sched) {
+        if (thread != sched.selected_thread) {
+            if (thread == nullptr) {
+                ++sched.idle_selection_count;
+            }
+            sched.selected_thread = thread;
+        }
+        sched.context_switch_pending = sched.selected_thread != sched.current_thread;
+        std::atomic_thread_fence(std::memory_order_seq_cst);
+    };
+    Scheduler& sched = system.Scheduler(core);
+    Thread* current_thread = nullptr;
+    // Step 1: Get top thread in schedule queue.
+    current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
+    if (current_thread) {
+        update_thread(current_thread, sched);
+        return;
+    }
+    // Step 2: Try selecting a suggested thread.
+    Thread* winner = nullptr;
+    std::set<s32> sug_cores;
+    for (auto thread : suggested_queue[core]) {
+        s32 this_core = thread->GetProcessorID();
+        Thread* thread_on_core = nullptr;
+        if (this_core >= 0) {
+            thread_on_core = scheduled_queue[this_core].front();
+        }
+        if (this_core < 0 || thread != thread_on_core) {
+            winner = thread;
+            break;
+        }
+        sug_cores.insert(this_core);
+    }
+    // if we got a suggested thread, select it, else do a second pass.
+    if (winner && winner->GetPriority() > 2) {
+        if (winner->IsRunning()) {
+            UnloadThread(winner->GetProcessorID());
+        }
+        TransferToCore(winner->GetPriority(), core, winner);
+        update_thread(winner, sched);
+        return;
+    }
+    // Step 3: Select a suggested thread from another core
+    for (auto& src_core : sug_cores) {
+        auto it = scheduled_queue[src_core].begin();
+        it++;
+        if (it != scheduled_queue[src_core].end()) {
+            Thread* thread_on_core = scheduled_queue[src_core].front();
+            Thread* to_change = *it;
+            if (thread_on_core->IsRunning() || to_change->IsRunning()) {
+                UnloadThread(src_core);
+            }
+            TransferToCore(thread_on_core->GetPriority(), core, thread_on_core);
+            current_thread = thread_on_core;
+            break;
+        }
+    }
+    update_thread(current_thread, sched);
+}
+
+/*
+ * YieldThread takes a thread and moves it to the back of the it's priority list
+ * This operation can be redundant and no scheduling is changed if marked as so.
+ */
+void GlobalScheduler::YieldThread(Thread* yielding_thread) {
+    // Note: caller should use critical section, etc.
+    const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
+    const u32 priority = yielding_thread->GetPriority();
+
+    // Yield the thread
+    ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority),
+               "Thread yielding without being in front");
+    scheduled_queue[core_id].yield(priority);
+
+    Thread* winner = scheduled_queue[core_id].front(priority);
+    AskForReselectionOrMarkRedundant(yielding_thread, winner);
+}
+
+/*
+ * YieldThreadAndBalanceLoad takes a thread and moves it to the back of the it's priority list.
+ * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or
+ * a better priority than the next thread in the core.
+ * This operation can be redundant and no scheduling is changed if marked as so.
+ */
+void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
+    // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
+    // etc.
+    const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
+    const u32 priority = yielding_thread->GetPriority();
+
+    // Yield the thread
+    ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority),
+               "Thread yielding without being in front");
+    scheduled_queue[core_id].yield(priority);
+
+    std::array<Thread*, NUM_CPU_CORES> current_threads;
+    for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+        current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
+    }
+
+    Thread* next_thread = scheduled_queue[core_id].front(priority);
+    Thread* winner = nullptr;
+    for (auto& thread : suggested_queue[core_id]) {
+        const s32 source_core = thread->GetProcessorID();
+        if (source_core >= 0) {
+            if (current_threads[source_core] != nullptr) {
+                if (thread == current_threads[source_core] ||
+                    current_threads[source_core]->GetPriority() < min_regular_priority) {
+                    continue;
+                }
+            }
+            if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() ||
+                next_thread->GetPriority() < thread->GetPriority()) {
+                if (thread->GetPriority() <= priority) {
+                    winner = thread;
+                    break;
+                }
+            }
+        }
+    }
+
+    if (winner != nullptr) {
+        if (winner != yielding_thread) {
+            if (winner->IsRunning()) {
+                UnloadThread(winner->GetProcessorID());
+            }
+            TransferToCore(winner->GetPriority(), core_id, winner);
+        }
+    } else {
+        winner = next_thread;
+    }
+
+    AskForReselectionOrMarkRedundant(yielding_thread, winner);
+}
+
+/*
+ * YieldThreadAndWaitForLoadBalancing takes a thread and moves it out of the scheduling queue
+ * and into the suggested queue. If no thread can be squeduled afterwards in that core,
+ * a suggested thread is obtained instead.
+ * This operation can be redundant and no scheduling is changed if marked as so.
+ */
+void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) {
+    // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
+    // etc.
+    Thread* winner = nullptr;
+    const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
+
+    // Remove the thread from its scheduled mlq, put it on the corresponding "suggested" one instead
+    TransferToCore(yielding_thread->GetPriority(), -1, yielding_thread);
+
+    // If the core is idle, perform load balancing, excluding the threads that have just used this
+    // function...
+    if (scheduled_queue[core_id].empty()) {
+        // Here, "current_threads" is calculated after the ""yield"", unlike yield -1
+        std::array<Thread*, NUM_CPU_CORES> current_threads;
+        for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+            current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
+        }
+        for (auto& thread : suggested_queue[core_id]) {
+            const s32 source_core = thread->GetProcessorID();
+            if (source_core < 0 || thread == current_threads[source_core]) {
+                continue;
+            }
+            if (current_threads[source_core] == nullptr ||
+                current_threads[source_core]->GetPriority() >= min_regular_priority) {
+                winner = thread;
+            }
+            break;
+        }
+        if (winner != nullptr) {
+            if (winner != yielding_thread) {
+                if (winner->IsRunning()) {
+                    UnloadThread(winner->GetProcessorID());
+                }
+                TransferToCore(winner->GetPriority(), core_id, winner);
+            }
+        } else {
+            winner = yielding_thread;
+        }
+    }
+
+    AskForReselectionOrMarkRedundant(yielding_thread, winner);
+}
+
+void GlobalScheduler::Schedule(u32 priority, u32 core, Thread* thread) {
+    ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core.");
+    scheduled_queue[core].add(thread, priority);
+}
+
+void GlobalScheduler::SchedulePrepend(u32 priority, u32 core, Thread* thread) {
+    ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core.");
+    scheduled_queue[core].add(thread, priority, false);
+}
+
+void GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) {
+    if (current_thread == winner) {
+        // TODO(blinkhawk): manage redundant operations, this is not implemented.
+        // as its mostly an optimization.
+        // current_thread->SetRedundantSchedulerOperation();
+    } else {
+        reselection_pending.store(true, std::memory_order_release);
    }
 }

+GlobalScheduler::~GlobalScheduler() = default;
+
+Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 core_id)
+    : system(system), cpu_core(cpu_core), core_id(core_id) {}
+
+Scheduler::~Scheduler() = default;
+
 bool Scheduler::HaveReadyThreads() const {
-    std::lock_guard lock{scheduler_mutex};
-    return !ready_queue.empty();
+    return system.GlobalScheduler().HaveReadyThreads(core_id);
 }

 Thread* Scheduler::GetCurrentThread() const {
    return current_thread.get();
 }

+Thread* Scheduler::GetSelectedThread() const {
+    return selected_thread.get();
+}
+
+void Scheduler::SelectThreads() {
+    system.GlobalScheduler().SelectThread(core_id);
+}
+
 u64 Scheduler::GetLastContextSwitchTicks() const {
    return last_context_switch_time;
 }

-Thread* Scheduler::PopNextReadyThread() {
-    Thread* next = nullptr;
-    Thread* thread = GetCurrentThread();
-
-    if (thread && thread->GetStatus() == ThreadStatus::Running) {
-        if (ready_queue.empty()) {
-            return thread;
-        }
-        // We have to do better than the current thread.
-        // This call returns null when that's not possible.
-        next = ready_queue.front();
-        if (next == nullptr || next->GetPriority() >= thread->GetPriority()) {
-            next = thread;
-        }
-    } else {
-        if (ready_queue.empty()) {
-            return nullptr;
-        }
-        next = ready_queue.front();
+void Scheduler::TryDoContextSwitch() {
+    if (context_switch_pending) {
+        SwitchContext();
    }
-
-    return next;
 }

-void Scheduler::SwitchContext(Thread* new_thread) {
-    Thread* previous_thread = GetCurrentThread();
+void Scheduler::UnloadThread() {
+    Thread* const previous_thread = GetCurrentThread();
    Process* const previous_process = system.Kernel().CurrentProcess();

    UpdateLastContextSwitchTime(previous_thread, previous_process);
@@ -80,23 +306,52 @@ void Scheduler::SwitchContext(Thread* new_thread) {
        if (previous_thread->GetStatus() == ThreadStatus::Running) {
            // This is only the case when a reschedule is triggered without the current thread
            // yielding execution (i.e. an event triggered, system core time-sliced, etc)
-            ready_queue.add(previous_thread, previous_thread->GetPriority(), false);
            previous_thread->SetStatus(ThreadStatus::Ready);
        }
+        previous_thread->SetIsRunning(false);
+    }
+    current_thread = nullptr;
+}
+
+void Scheduler::SwitchContext() {
+    Thread* const previous_thread = GetCurrentThread();
+    Thread* const new_thread = GetSelectedThread();
+
+    context_switch_pending = false;
+    if (new_thread == previous_thread) {
+        return;
+    }
+
+    Process* const previous_process = system.Kernel().CurrentProcess();
+
+    UpdateLastContextSwitchTime(previous_thread, previous_process);
+
+    // Save context for previous thread
+    if (previous_thread) {
+        cpu_core.SaveContext(previous_thread->GetContext());
+        // Save the TPIDR_EL0 system register in case it was modified.
+        previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
+
+        if (previous_thread->GetStatus() == ThreadStatus::Running) {
+            // This is only the case when a reschedule is triggered without the current thread
+            // yielding execution (i.e. an event triggered, system core time-sliced, etc)
+            previous_thread->SetStatus(ThreadStatus::Ready);
+        }
+        previous_thread->SetIsRunning(false);
    }

    // Load context of new thread
    if (new_thread) {
+        ASSERT_MSG(new_thread->GetProcessorID() == this->core_id,
+                   "Thread must be assigned to this core.");
        ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready,
                   "Thread must be ready to become running.");

        // Cancel any outstanding wakeup events for this thread
        new_thread->CancelWakeupTimer();
-
        current_thread = new_thread;
-
-        ready_queue.remove(new_thread, new_thread->GetPriority());
        new_thread->SetStatus(ThreadStatus::Running);
+        new_thread->SetIsRunning(true);

        auto* const thread_owner_process = current_thread->GetOwnerProcess();
        if (previous_process != thread_owner_process) {
@@ -130,124 +385,4 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
    last_context_switch_time = most_recent_switch_ticks;
 }

-void Scheduler::Reschedule() {
-    std::lock_guard lock{scheduler_mutex};
-
-    Thread* cur = GetCurrentThread();
-    Thread* next = PopNextReadyThread();
-
-    if (cur && next) {
-        LOG_TRACE(Kernel, "context switch {} -> {}", cur->GetObjectId(), next->GetObjectId());
-    } else if (cur) {
-        LOG_TRACE(Kernel, "context switch {} -> idle", cur->GetObjectId());
-    } else if (next) {
-        LOG_TRACE(Kernel, "context switch idle -> {}", next->GetObjectId());
-    }
-
-    SwitchContext(next);
-}
-
-void Scheduler::AddThread(SharedPtr<Thread> thread) {
-    std::lock_guard lock{scheduler_mutex};
-
-    thread_list.push_back(std::move(thread));
-}
-
-void Scheduler::RemoveThread(Thread* thread) {
-    std::lock_guard lock{scheduler_mutex};
-
-    thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
-                      thread_list.end());
-}
-
-void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
-    std::lock_guard lock{scheduler_mutex};
-
-    ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.add(thread, priority);
-}
-
-void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
-    std::lock_guard lock{scheduler_mutex};
-
-    ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.remove(thread, priority);
-}
-
-void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
-    std::lock_guard lock{scheduler_mutex};
-    if (thread->GetPriority() == priority) {
-        return;
-    }
-
-    // If thread was ready, adjust queues
-    if (thread->GetStatus() == ThreadStatus::Ready)
-        ready_queue.adjust(thread, thread->GetPriority(), priority);
-}
-
-Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const {
-    std::lock_guard lock{scheduler_mutex};
-
-    const u32 mask = 1U << core;
-    for (auto* thread : ready_queue) {
-        if ((thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority) {
-            return thread;
-        }
-    }
-    return nullptr;
-}
-
-void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
-    ASSERT(thread != nullptr);
-    // Avoid yielding if the thread isn't even running.
-    ASSERT(thread->GetStatus() == ThreadStatus::Running);
-
-    // Sanity check that the priority is valid
-    ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
-
-    // Yield this thread -- sleep for zero time and force reschedule to different thread
-    GetCurrentThread()->Sleep(0);
-}
-
-void Scheduler::YieldWithLoadBalancing(Thread* thread) {
-    ASSERT(thread != nullptr);
-    const auto priority = thread->GetPriority();
-    const auto core = static_cast<u32>(thread->GetProcessorID());
-
-    // Avoid yielding if the thread isn't even running.
-    ASSERT(thread->GetStatus() == ThreadStatus::Running);
-
-    // Sanity check that the priority is valid
-    ASSERT(priority < THREADPRIO_COUNT);
-
-    // Sleep for zero time to be able to force reschedule to different thread
-    GetCurrentThread()->Sleep(0);
-
-    Thread* suggested_thread = nullptr;
-
-    // Search through all of the cpu cores (except this one) for a suggested thread.
-    // Take the first non-nullptr one
-    for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
-        const auto res =
-            system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
-
-        // If scheduler provides a suggested thread
-        if (res != nullptr) {
-            // And its better than the current suggested thread (or is the first valid one)
-            if (suggested_thread == nullptr ||
-                suggested_thread->GetPriority() > res->GetPriority()) {
-                suggested_thread = res;
-            }
-        }
-    }
-
-    // If a suggested thread was found, queue that for this core
-    if (suggested_thread != nullptr)
-        suggested_thread->ChangeCore(core, suggested_thread->GetAffinityMask());
-}
-
-void Scheduler::YieldAndWaitForLoadBalancing(Thread* thread) {
-    UNIMPLEMENTED_MSG("Wait for load balancing thread yield type is not implemented!");
-}
-
 } // namespace Kernel
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -20,124 +20,178 @@ namespace Kernel {

 class Process;

-class Scheduler final {
+class GlobalScheduler final {
 public:
-    explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
-    ~Scheduler();
-
-    /// Returns whether there are any threads that are ready to run.
-    bool HaveReadyThreads() const;
-
-    /// Reschedules to the next available thread (call after current thread is suspended)
-    void Reschedule();
-
-    /// Gets the current running thread
-    Thread* GetCurrentThread() const;
-
-    /// Gets the timestamp for the last context switch in ticks.
-    u64 GetLastContextSwitchTicks() const;
+    static constexpr u32 NUM_CPU_CORES = 4;

+    explicit GlobalScheduler(Core::System& system);
+    ~GlobalScheduler();
    /// Adds a new thread to the scheduler
    void AddThread(SharedPtr<Thread> thread);

    /// Removes a thread from the scheduler
-    void RemoveThread(Thread* thread);
-
-    /// Schedules a thread that has become "ready"
-    void ScheduleThread(Thread* thread, u32 priority);
-
-    /// Unschedules a thread that was already scheduled
-    void UnscheduleThread(Thread* thread, u32 priority);
-
-    /// Sets the priority of a thread in the scheduler
-    void SetThreadPriority(Thread* thread, u32 priority);
-
-    /// Gets the next suggested thread for load balancing
-    Thread* GetNextSuggestedThread(u32 core, u32 minimum_priority) const;
-
-    /**
-     * YieldWithoutLoadBalancing -- analogous to normal yield on a system
-     * Moves the thread to the end of the ready queue for its priority, and then reschedules the
-     * system to the new head of the queue.
-     *
-     * Example (Single Core -- but can be extrapolated to multi):
-     * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC (->exec order->)
-     * Currently Running: ThreadR
-     *
-     * ThreadR calls YieldWithoutLoadBalancing
-     *
-     * ThreadR is moved to the end of ready_queue[prio=0]:
-     * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC, ThreadR (->exec order->)
-     * Currently Running: Nothing
-     *
-     * System is rescheduled (ThreadA is popped off of queue):
-     * ready_queue[prio=0]: ThreadB, ThreadC, ThreadR (->exec order->)
-     * Currently Running: ThreadA
-     *
-     * If the queue is empty at time of call, no yielding occurs. This does not cross between cores
-     * or priorities at all.
-     */
-    void YieldWithoutLoadBalancing(Thread* thread);
-
-    /**
-     * YieldWithLoadBalancing -- yield but with better selection of the new running thread
-     * Moves the current thread to the end of the ready queue for its priority, then selects a
-     * 'suggested thread' (a thread on a different core that could run on this core) from the
-     * scheduler, changes its core, and reschedules the current core to that thread.
-     *
-     * Example (Dual Core -- can be extrapolated to Quad Core, this is just normal yield if it were
-     * single core):
-     * ready_queue[core=0][prio=0]: ThreadA, ThreadB (affinities not pictured as irrelevant
-     * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
-     * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
-     *
-     * ThreadQ calls YieldWithLoadBalancing
-     *
-     * ThreadQ is moved to the end of ready_queue[core=0][prio=0]:
-     * ready_queue[core=0][prio=0]: ThreadA, ThreadB
-     * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
-     * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
-     *
-     * A list of suggested threads for each core is compiled
-     * Suggested Threads: {ThreadC on Core 1}
-     * If this were quad core (as the switch is), there could be between 0 and 3 threads in this
-     * list. If there are more than one, the thread is selected by highest prio.
-     *
-     * ThreadC is core changed to Core 0:
-     * ready_queue[core=0][prio=0]: ThreadC, ThreadA, ThreadB, ThreadQ
-     * ready_queue[core=1][prio=0]: ThreadD
-     * Currently Running: None on Core 0 || ThreadP on Core 1
-     *
-     * System is rescheduled (ThreadC is popped off of queue):
-     * ready_queue[core=0][prio=0]: ThreadA, ThreadB, ThreadQ
-     * ready_queue[core=1][prio=0]: ThreadD
-     * Currently Running: ThreadC on Core 0 || ThreadP on Core 1
-     *
-     * If no suggested threads can be found this will behave just as normal yield. If there are
-     * multiple candidates for the suggested thread on a core, the highest prio is taken.
-     */
-    void YieldWithLoadBalancing(Thread* thread);
-
-    /// Currently unknown -- asserts as unimplemented on call
-    void YieldAndWaitForLoadBalancing(Thread* thread);
+    void RemoveThread(const Thread* thread);

    /// Returns a list of all threads managed by the scheduler
    const std::vector<SharedPtr<Thread>>& GetThreadList() const {
        return thread_list;
    }

-private:
-    /**
-     * Pops and returns the next thread from the thread queue
-     * @return A pointer to the next ready thread
-     */
-    Thread* PopNextReadyThread();
+    // Add a thread to the suggested queue of a cpu core. Suggested threads may be
+    // picked if no thread is scheduled to run on the core.
+    void Suggest(u32 priority, u32 core, Thread* thread) {
+        suggested_queue[core].add(thread, priority);
+    }

+    // Remove a thread to the suggested queue of a cpu core. Suggested threads may be
+    // picked if no thread is scheduled to run on the core.
+    void Unsuggest(u32 priority, u32 core, Thread* thread) {
+        suggested_queue[core].remove(thread, priority);
+    }
+
+    // Add a thread to the scheduling queue of a cpu core. The thread is added at the
+    // back the queue in its priority level
+    void Schedule(u32 priority, u32 core, Thread* thread);
+
+    // Add a thread to the scheduling queue of a cpu core. The thread is added at the
+    // front the queue in its priority level
+    void SchedulePrepend(u32 priority, u32 core, Thread* thread);
+
+    // Reschedule an already scheduled thread based on a new priority
+    void Reschedule(u32 priority, u32 core, Thread* thread) {
+        scheduled_queue[core].remove(thread, priority);
+        scheduled_queue[core].add(thread, priority);
+    }
+
+    // Unschedule a thread.
+    void Unschedule(u32 priority, u32 core, Thread* thread) {
+        scheduled_queue[core].remove(thread, priority);
+    }
+
+    // Transfers a thread into an specific core. If the destination_core is -1
+    // it will be unscheduled from its source code and added into its suggested
+    // queue.
+    void TransferToCore(u32 priority, s32 destination_core, Thread* thread) {
+        const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
+        const s32 source_core = thread->GetProcessorID();
+        if (source_core == destination_core || !schedulable) {
+            return;
+        }
+        thread->SetProcessorID(destination_core);
+        if (source_core >= 0) {
+            Unschedule(priority, source_core, thread);
+        }
+        if (destination_core >= 0) {
+            Unsuggest(priority, destination_core, thread);
+            Schedule(priority, destination_core, thread);
+        }
+        if (source_core >= 0) {
+            Suggest(priority, source_core, thread);
+        }
+    }
+
+    /*
+     * UnloadThread selects a core and forces it to unload its current thread's context
+     */
+    void UnloadThread(s32 core);
+
+    /*
+     * SelectThread takes care of selecting the new scheduled thread.
+     * It does it in 3 steps:
+     * - First a thread is selected from the top of the priority queue. If no thread
+     * is obtained then we move to step two, else we are done.
+     * - Second we try to get a suggested thread that's not assigned to any core or
+     * that is not the top thread in that core.
+     * - Third is no suggested thread is found, we do a second pass and pick a running
+     * thread in another core and swap it with its current thread.
+     */
+    void SelectThread(u32 core);
+
+    bool HaveReadyThreads(u32 core_id) const {
+        return !scheduled_queue[core_id].empty();
+    }
+
+    /*
+     * YieldThread takes a thread and moves it to the back of the it's priority list
+     * This operation can be redundant and no scheduling is changed if marked as so.
+     */
+    void YieldThread(Thread* thread);
+
+    /*
+     * YieldThreadAndBalanceLoad takes a thread and moves it to the back of the it's priority list.
+     * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or
+     * a better priority than the next thread in the core.
+     * This operation can be redundant and no scheduling is changed if marked as so.
+     */
+    void YieldThreadAndBalanceLoad(Thread* thread);
+
+    /*
+     * YieldThreadAndWaitForLoadBalancing takes a thread and moves it out of the scheduling queue
+     * and into the suggested queue. If no thread can be squeduled afterwards in that core,
+     * a suggested thread is obtained instead.
+     * This operation can be redundant and no scheduling is changed if marked as so.
+     */
+    void YieldThreadAndWaitForLoadBalancing(Thread* thread);
+
+    u32 CpuCoresCount() const {
+        return NUM_CPU_CORES;
+    }
+
+    void SetReselectionPending() {
+        reselection_pending.store(true, std::memory_order_release);
+    }
+
+    bool IsReselectionPending() const {
+        return reselection_pending.load();
+    }
+
+private:
+    void AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner);
+
+    static constexpr u32 min_regular_priority = 2;
+    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> scheduled_queue;
+    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> suggested_queue;
+    std::atomic<bool> reselection_pending;
+
+    /// Lists all thread ids that aren't deleted/etc.
+    std::vector<SharedPtr<Thread>> thread_list;
+    Core::System& system;
+};
+
+class Scheduler final {
+public:
+    explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, const u32 core_id);
+    ~Scheduler();
+
+    /// Returns whether there are any threads that are ready to run.
+    bool HaveReadyThreads() const;
+
+    /// Reschedules to the next available thread (call after current thread is suspended)
+    void TryDoContextSwitch();
+
+    void UnloadThread();
+
+    void SelectThreads();
+
+    /// Gets the current running thread
+    Thread* GetCurrentThread() const;
+
+    Thread* GetSelectedThread() const;
+
+    /// Gets the timestamp for the last context switch in ticks.
+    u64 GetLastContextSwitchTicks() const;
+
+    bool ContextSwitchPending() const {
+        return context_switch_pending;
+    }
+
+private:
+    friend class GlobalScheduler;
    /**
     * Switches the CPU's active thread context to that of the specified thread
     * @param new_thread The thread to switch to
     */
-    void SwitchContext(Thread* new_thread);
+    void SwitchContext();

    /**
     * Called on every context switch to update the internal timestamp
@@ -152,19 +206,16 @@ private:
     */
    void UpdateLastContextSwitchTime(Thread* thread, Process* process);

-    /// Lists all thread ids that aren't deleted/etc.
-    std::vector<SharedPtr<Thread>> thread_list;
-
-    /// Lists only ready thread ids.
-    Common::MultiLevelQueue<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
-
    SharedPtr<Thread> current_thread = nullptr;
-
-    Core::ARM_Interface& cpu_core;
-    u64 last_context_switch_time = 0;
+    SharedPtr<Thread> selected_thread = nullptr;

    Core::System& system;
-    static std::mutex scheduler_mutex;
+    Core::ARM_Interface& cpu_core;
+    u64 last_context_switch_time = 0;
+    u64 idle_selection_count = 0;
+    const u32 core_id;
+
+    bool context_switch_pending = false;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -516,7 +516,7 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
    thread->WakeAfterDelay(nano_seconds);
    thread->SetWakeupCallback(DefaultThreadWakeupCallback);

-    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(thread->GetProcessorID());

    return RESULT_TIMEOUT;
 }
@@ -534,6 +534,7 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
    }

    thread->CancelWait();
+    system.PrepareReschedule(thread->GetProcessorID());
    return RESULT_SUCCESS;
 }

@@ -577,7 +578,8 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
    }

    auto* const current_process = system.Kernel().CurrentProcess();
-    return current_process->GetMutex().Release(mutex_addr);
+    return current_process->GetMutex().Release(mutex_addr,
+                                               system.CurrentScheduler().GetCurrentThread());
 }

 enum class BreakType : u32 {
@@ -1066,6 +1068,8 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act
    }

    thread->SetActivity(static_cast<ThreadActivity>(activity));
+
+    system.PrepareReschedule(thread->GetProcessorID());
    return RESULT_SUCCESS;
 }

@@ -1147,7 +1151,7 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri

    thread->SetPriority(priority);

-    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(thread->GetProcessorID());
    return RESULT_SUCCESS;
 }

@@ -1503,7 +1507,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
    thread->SetName(
        fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));

-    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(thread->GetProcessorID());

    return RESULT_SUCCESS;
 }
@@ -1525,7 +1529,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
    thread->ResumeFromWait();

    if (thread->GetStatus() == ThreadStatus::Ready) {
-        system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
+        system.PrepareReschedule(thread->GetProcessorID());
    }

    return RESULT_SUCCESS;
@@ -1537,7 +1541,7 @@ static void ExitThread(Core::System& system) {

    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
    current_thread->Stop();
-    system.CurrentScheduler().RemoveThread(current_thread);
+    system.GlobalScheduler().RemoveThread(current_thread);
    system.PrepareReschedule();
 }

@@ -1557,13 +1561,13 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
    if (nanoseconds <= 0) {
        switch (static_cast<SleepType>(nanoseconds)) {
        case SleepType::YieldWithoutLoadBalancing:
-            scheduler.YieldWithoutLoadBalancing(current_thread);
+            current_thread->YieldSimple();
            break;
        case SleepType::YieldWithLoadBalancing:
-            scheduler.YieldWithLoadBalancing(current_thread);
+            current_thread->YieldAndBalanceLoad();
            break;
        case SleepType::YieldAndWaitForLoadBalancing:
-            scheduler.YieldAndWaitForLoadBalancing(current_thread);
+            current_thread->YieldAndWaitForLoadBalancing();
            break;
        default:
            UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
@@ -1572,10 +1576,7 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
        current_thread->Sleep(nanoseconds);
    }

-    // Reschedule all CPU cores
-    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
-        system.CpuCore(i).PrepareReschedule();
-    }
+    system.PrepareReschedule(current_thread->GetProcessorID());
 }

 /// Wait process wide key atomic
@@ -1606,12 +1607,14 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
    SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
    ASSERT(thread);

-    const auto release_result = current_process->GetMutex().Release(mutex_addr);
+    SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
+
+    const auto release_result =
+        current_process->GetMutex().Release(mutex_addr, current_thread.get());
    if (release_result.IsError()) {
        return release_result;
    }

-    SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
    current_thread->SetCondVarWaitAddress(condition_variable_addr);
    current_thread->SetMutexWaitAddress(mutex_addr);
    current_thread->SetWaitHandle(thread_handle);
@@ -1622,7 +1625,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add

    // Note: Deliberately don't attempt to inherit the lock owner's priority.

-    system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(current_thread->GetProcessorID());
    return RESULT_SUCCESS;
 }

@@ -1632,24 +1635,17 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
    LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
              condition_variable_addr, target);

-    const auto RetrieveWaitingThreads = [&system](std::size_t core_index,
-                                                  std::vector<SharedPtr<Thread>>& waiting_threads,
-                                                  VAddr condvar_addr) {
-        const auto& scheduler = system.Scheduler(core_index);
-        const auto& thread_list = scheduler.GetThreadList();
-
-        for (const auto& thread : thread_list) {
-            if (thread->GetCondVarWaitAddress() == condvar_addr)
-                waiting_threads.push_back(thread);
-        }
-    };
-
    // Retrieve a list of all threads that are waiting for this condition variable.
    std::vector<SharedPtr<Thread>> waiting_threads;
-    RetrieveWaitingThreads(0, waiting_threads, condition_variable_addr);
-    RetrieveWaitingThreads(1, waiting_threads, condition_variable_addr);
-    RetrieveWaitingThreads(2, waiting_threads, condition_variable_addr);
-    RetrieveWaitingThreads(3, waiting_threads, condition_variable_addr);
+    const auto& scheduler = system.GlobalScheduler();
+    const auto& thread_list = scheduler.GetThreadList();
+
+    for (const auto& thread : thread_list) {
+        if (thread->GetCondVarWaitAddress() == condition_variable_addr) {
+            waiting_threads.push_back(thread);
+        }
+    }
+
    // Sort them by priority, such that the highest priority ones come first.
    std::sort(waiting_threads.begin(), waiting_threads.end(),
              [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
@@ -1704,7 +1700,7 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
            thread->SetLockOwner(nullptr);
            thread->SetMutexWaitAddress(0);
            thread->SetWaitHandle(0);
-            system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
+            system.PrepareReschedule(thread->GetProcessorID());
        } else {
            // Atomically signal that the mutex now has a waiting thread.
            do {
@@ -1728,6 +1724,7 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
            thread->SetStatus(ThreadStatus::WaitMutex);

            owner->AddMutexWaiter(thread);
+            system.PrepareReschedule(thread->GetProcessorID());
        }
    }

@@ -1754,7 +1751,12 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,

    const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
    auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
-    return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
+    const ResultCode result =
+        address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
+    if (result == RESULT_SUCCESS) {
+        system.PrepareReschedule();
+    }
+    return result;
 }

 // Signals to an address (via Address Arbiter)
@@ -2040,7 +2042,10 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
        return ERR_INVALID_HANDLE;
    }

+    system.PrepareReschedule(thread->GetProcessorID());
    thread->ChangeCore(core, affinity_mask);
+    system.PrepareReschedule(thread->GetProcessorID());
+
    return RESULT_SUCCESS;
 }

@@ -2151,6 +2156,7 @@ static ResultCode SignalEvent(Core::System& system, Handle handle) {
    }

    writable_event->Signal();
+    system.PrepareReschedule();
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -45,15 +45,7 @@ void Thread::Stop() {
                                                             callback_handle);
    kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
    callback_handle = 0;
-
-    // Clean up thread from ready queue
-    // This is only needed when the thread is terminated forcefully (SVC TerminateProcess)
-    if (status == ThreadStatus::Ready || status == ThreadStatus::Paused) {
-        scheduler->UnscheduleThread(this, current_priority);
-    }
-
-    status = ThreadStatus::Dead;
-
+    SetStatus(ThreadStatus::Dead);
    WakeupAllWaitingThreads();

    // Clean up any dangling references in objects that this thread was waiting for
@@ -132,13 +124,11 @@ void Thread::ResumeFromWait() {
    wakeup_callback = nullptr;

    if (activity == ThreadActivity::Paused) {
-        status = ThreadStatus::Paused;
+        SetStatus(ThreadStatus::Paused);
        return;
    }

-    status = ThreadStatus::Ready;
-
-    ChangeScheduler();
+    SetStatus(ThreadStatus::Ready);
 }

 void Thread::CancelWait() {
@@ -205,9 +195,9 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
    thread->name = std::move(name);
    thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
    thread->owner_process = &owner_process;
+    auto& scheduler = kernel.GlobalScheduler();
+    scheduler.AddThread(thread);
    thread->tls_address = thread->owner_process->CreateTLSRegion();
-    thread->scheduler = &system.Scheduler(processor_id);
-    thread->scheduler->AddThread(thread);

    thread->owner_process->RegisterThread(thread.get());

@@ -250,6 +240,22 @@ void Thread::SetStatus(ThreadStatus new_status) {
        return;
    }

+    switch (new_status) {
+    case ThreadStatus::Ready:
+    case ThreadStatus::Running:
+        SetSchedulingStatus(ThreadSchedStatus::Runnable);
+        break;
+    case ThreadStatus::Dormant:
+        SetSchedulingStatus(ThreadSchedStatus::None);
+        break;
+    case ThreadStatus::Dead:
+        SetSchedulingStatus(ThreadSchedStatus::Exited);
+        break;
+    default:
+        SetSchedulingStatus(ThreadSchedStatus::Paused);
+        break;
+    }
+
    if (status == ThreadStatus::Running) {
        last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
    }
@@ -311,8 +317,7 @@ void Thread::UpdatePriority() {
        return;
    }

-    scheduler->SetThreadPriority(this, new_priority);
-    current_priority = new_priority;
+    SetCurrentPriority(new_priority);

    if (!lock_owner) {
        return;
@@ -328,47 +333,7 @@ void Thread::UpdatePriority() {
 }

 void Thread::ChangeCore(u32 core, u64 mask) {
-    ideal_core = core;
-    affinity_mask = mask;
-    ChangeScheduler();
-}
-
-void Thread::ChangeScheduler() {
-    if (status != ThreadStatus::Ready) {
-        return;
-    }
-
-    auto& system = Core::System::GetInstance();
-    std::optional<s32> new_processor_id{GetNextProcessorId(affinity_mask)};
-
-    if (!new_processor_id) {
-        new_processor_id = processor_id;
-    }
-    if (ideal_core != -1 && system.Scheduler(ideal_core).GetCurrentThread() == nullptr) {
-        new_processor_id = ideal_core;
-    }
-
-    ASSERT(*new_processor_id < 4);
-
-    // Add thread to new core's scheduler
-    auto& next_scheduler = system.Scheduler(*new_processor_id);
-
-    if (*new_processor_id != processor_id) {
-        // Remove thread from previous core's scheduler
-        scheduler->RemoveThread(this);
-        next_scheduler.AddThread(this);
-    }
-
-    processor_id = *new_processor_id;
-
-    // If the thread was ready, unschedule from the previous core and schedule on the new core
-    scheduler->UnscheduleThread(this, current_priority);
-    next_scheduler.ScheduleThread(this, current_priority);
-
-    // Change thread's scheduler
-    scheduler = &next_scheduler;
-
-    system.CpuCore(processor_id).PrepareReschedule();
+    SetCoreAndAffinityMask(core, mask);
 }

 bool Thread::AllWaitObjectsReady() const {
@@ -391,7 +356,7 @@ void Thread::SetActivity(ThreadActivity value) {
        if (status == ThreadStatus::Ready) {
            status = ThreadStatus::Paused;
        } else if (status == ThreadStatus::Running) {
-            status = ThreadStatus::Paused;
+            SetStatus(ThreadStatus::Paused);
            Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
        }
    } else if (status == ThreadStatus::Paused) {
@@ -408,6 +373,171 @@ void Thread::Sleep(s64 nanoseconds) {
    WakeAfterDelay(nanoseconds);
 }

+void Thread::YieldSimple() {
+    auto& scheduler = kernel.GlobalScheduler();
+    scheduler.YieldThread(this);
+}
+
+void Thread::YieldAndBalanceLoad() {
+    auto& scheduler = kernel.GlobalScheduler();
+    scheduler.YieldThreadAndBalanceLoad(this);
+}
+
+void Thread::YieldAndWaitForLoadBalancing() {
+    auto& scheduler = kernel.GlobalScheduler();
+    scheduler.YieldThreadAndWaitForLoadBalancing(this);
+}
+
+void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
+    const u32 old_flags = scheduling_state;
+    scheduling_state =
+        (scheduling_state & ThreadSchedMasks::HighMask) | static_cast<u32>(new_status);
+    AdjustSchedulingOnStatus(old_flags);
+}
+
+void Thread::SetCurrentPriority(u32 new_priority) {
+    u32 old_priority = std::exchange(current_priority, new_priority);
+    AdjustSchedulingOnPriority(old_priority);
+}
+
+ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
+    const auto HighestSetCore = [](u64 mask, u32 max_cores) {
+        for (s32 core = max_cores - 1; core >= 0; core--) {
+            if (((mask >> core) & 1) != 0) {
+                return core;
+            }
+        }
+        return -1;
+    };
+
+    const bool use_override = affinity_override_count != 0;
+    // The value -3 is "do not change the ideal core".
+    if (new_core == -3) {
+        new_core = use_override ? ideal_core_override : ideal_core;
+        if ((new_affinity_mask & (1 << new_core)) == 0) {
+            return ERR_INVALID_COMBINATION;
+        }
+    }
+    if (use_override) {
+        ideal_core_override = new_core;
+        affinity_mask_override = new_affinity_mask;
+    } else {
+        const u64 old_affinity_mask = std::exchange(affinity_mask, new_affinity_mask);
+        ideal_core = new_core;
+        if (old_affinity_mask != new_affinity_mask) {
+            const s32 old_core = processor_id;
+            if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) {
+                if (ideal_core < 0) {
+                    processor_id = HighestSetCore(affinity_mask, GlobalScheduler::NUM_CPU_CORES);
+                } else {
+                    processor_id = ideal_core;
+                }
+            }
+            AdjustSchedulingOnAffinity(old_affinity_mask, old_core);
+        }
+    }
+    return RESULT_SUCCESS;
+}
+
+void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
+    if (old_flags == scheduling_state) {
+        return;
+    }
+
+    auto& scheduler = kernel.GlobalScheduler();
+    if (static_cast<ThreadSchedStatus>(old_flags & ThreadSchedMasks::LowMask) ==
+        ThreadSchedStatus::Runnable) {
+        // In this case the thread was running, now it's pausing/exitting
+        if (processor_id >= 0) {
+            scheduler.Unschedule(current_priority, processor_id, this);
+        }
+
+        for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+            if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
+                scheduler.Unsuggest(current_priority, core, this);
+            }
+        }
+    } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) {
+        // The thread is now set to running from being stopped
+        if (processor_id >= 0) {
+            scheduler.Schedule(current_priority, processor_id, this);
+        }
+
+        for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+            if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
+                scheduler.Suggest(current_priority, core, this);
+            }
+        }
+    }
+
+    scheduler.SetReselectionPending();
+}
+
+void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
+    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
+        return;
+    }
+    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    if (processor_id >= 0) {
+        scheduler.Unschedule(old_priority, processor_id, this);
+    }
+
+    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
+            scheduler.Unsuggest(old_priority, core, this);
+        }
+    }
+
+    // Add thread to the new priority queues.
+    Thread* current_thread = GetCurrentThread();
+
+    if (processor_id >= 0) {
+        if (current_thread == this) {
+            scheduler.SchedulePrepend(current_priority, processor_id, this);
+        } else {
+            scheduler.Schedule(current_priority, processor_id, this);
+        }
+    }
+
+    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
+            scheduler.Suggest(current_priority, core, this);
+        }
+    }
+
+    scheduler.SetReselectionPending();
+}
+
+void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
+    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
+        current_priority >= THREADPRIO_COUNT) {
+        return;
+    }
+
+    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        if (((old_affinity_mask >> core) & 1) != 0) {
+            if (core == old_core) {
+                scheduler.Unschedule(current_priority, core, this);
+            } else {
+                scheduler.Unsuggest(current_priority, core, this);
+            }
+        }
+    }
+
+    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        if (((affinity_mask >> core) & 1) != 0) {
+            if (core == processor_id) {
+                scheduler.Schedule(current_priority, core, this);
+            } else {
+                scheduler.Suggest(current_priority, core, this);
+            }
+        }
+    }
+
+    scheduler.SetReselectionPending();
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////

 /**
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -75,6 +75,26 @@ enum class ThreadActivity : u32 {
    Paused = 1,
 };

+enum class ThreadSchedStatus : u32 {
+    None = 0,
+    Paused = 1,
+    Runnable = 2,
+    Exited = 3,
+};
+
+enum ThreadSchedFlags : u32 {
+    ProcessPauseFlag = 1 << 4,
+    ThreadPauseFlag = 1 << 5,
+    ProcessDebugPauseFlag = 1 << 6,
+    KernelInitPauseFlag = 1 << 8,
+};
+
+enum ThreadSchedMasks : u32 {
+    LowMask = 0x000f,
+    HighMask = 0xfff0,
+    ForcePauseMask = 0x0070,
+};
+
 class Thread final : public WaitObject {
 public:
    using MutexWaitingThreads = std::vector<SharedPtr<Thread>>;
@@ -278,6 +298,10 @@ public:
        return processor_id;
    }

+    void SetProcessorID(s32 new_core) {
+        processor_id = new_core;
+    }
+
    Process* GetOwnerProcess() {
        return owner_process;
    }
@@ -383,11 +407,38 @@ public:
    /// Sleeps this thread for the given amount of nanoseconds.
    void Sleep(s64 nanoseconds);

+    /// Yields this thread without rebalancing loads.
+    void YieldSimple();
+
+    /// Yields this thread and does a load rebalancing.
+    void YieldAndBalanceLoad();
+
+    /// Yields this thread and if the core is left idle, loads are rebalanced
+    void YieldAndWaitForLoadBalancing();
+
+    ThreadSchedStatus GetSchedulingStatus() const {
+        return static_cast<ThreadSchedStatus>(scheduling_state & ThreadSchedMasks::LowMask);
+    }
+
+    bool IsRunning() const {
+        return is_running;
+    }
+
+    void SetIsRunning(bool value) {
+        is_running = value;
+    }
+
 private:
    explicit Thread(KernelCore& kernel);
    ~Thread() override;

-    void ChangeScheduler();
+    void SetSchedulingStatus(ThreadSchedStatus new_status);
+    void SetCurrentPriority(u32 new_priority);
+    ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
+
+    void AdjustSchedulingOnStatus(u32 old_flags);
+    void AdjustSchedulingOnPriority(u32 old_priority);
+    void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core);

    Core::ARM_Interface::ThreadContext context{};

@@ -453,6 +504,13 @@ private:

    ThreadActivity activity = ThreadActivity::Normal;

+    s32 ideal_core_override = -1;
+    u64 affinity_mask_override = 0x1;
+    u32 affinity_override_count = 0;
+
+    u32 scheduling_state = 0;
+    bool is_running = false;
+
    std::string name;
 };

--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -23,6 +23,8 @@ SharedPtr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr base_
    transfer_memory->owner_permissions = permissions;
    transfer_memory->owner_process = kernel.CurrentProcess();

+    transfer_memory->MapMemory(base_address, size, permissions);
+
    return transfer_memory;
 }

--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -6,6 +6,8 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "core/core.h"
+#include "core/core_cpu.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/thread.h"
@@ -48,17 +50,8 @@ SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() const {
        if (ShouldWait(thread.get()))
            continue;

-        // A thread is ready to run if it's either in ThreadStatus::WaitSynch
-        // and the rest of the objects it is waiting on are ready.
-        bool ready_to_run = true;
-        if (thread_status == ThreadStatus::WaitSynch) {
-            ready_to_run = thread->AllWaitObjectsReady();
-        }
-
-        if (ready_to_run) {
-            candidate = thread.get();
-            candidate_priority = thread->GetPriority();
-        }
+        candidate = thread.get();
+        candidate_priority = thread->GetPriority();
    }

    return candidate;
@@ -95,6 +88,7 @@ void WaitObject::WakeupWaitingThread(SharedPtr<Thread> thread) {
    }
    if (resume) {
        thread->ResumeFromWait();
+        Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID());
    }
 }

--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -841,17 +841,16 @@ private:
    void PopInteractiveOutData(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_AM, "called");

-        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
        const auto storage = applet->GetBroker().PopInteractiveDataToGame();
        if (storage == nullptr) {
            LOG_ERROR(Service_AM,
                      "storage is a nullptr. There is no data in the current interactive channel");
-
+            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NO_DATA_IN_CHANNEL);
            return;
        }

+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
        rb.PushIpcInterface<IStorage>(std::move(*storage));
    }
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -27,9 +27,9 @@ AppletDataBroker::AppletDataBroker(Kernel::KernelCore& kernel) {
    state_changed_event = Kernel::WritableEvent::CreateEventPair(
        kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent");
    pop_out_data_event = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopDataOutEvent");
+        kernel, Kernel::ResetType::Automatic, "ILibraryAppletAccessor:PopDataOutEvent");
    pop_interactive_out_data_event = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
+        kernel, Kernel::ResetType::Automatic, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
 }

 AppletDataBroker::~AppletDataBroker() = default;
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -91,6 +91,7 @@ void SoftwareKeyboard::ExecuteInteractive() {

    if (status == INTERACTIVE_STATUS_OK) {
        complete = true;
+        broker.SignalStateChanged();
    } else {
        std::array<char16_t, SWKBD_OUTPUT_INTERACTIVE_BUFFER_SIZE / 2 - 2> string;
        std::memcpy(string.data(), data.data() + 4, string.size() * 2);
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -40,7 +40,10 @@ static FileSys::VirtualDir GetDirectoryRelativeWrapped(FileSys::VirtualDir base,
    if (dir_name.empty() || dir_name == "." || dir_name == "/" || dir_name == "\\")
        return base;

-    return base->GetDirectoryRelative(dir_name);
+    const auto res = base->GetDirectoryRelative(dir_name);
+    if (res == nullptr)
+        return base->CreateDirectoryRelative(dir_name);
+    return res;
 }

 VfsDirectoryServiceWrapper::VfsDirectoryServiceWrapper(FileSys::VirtualDir backing_)
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -201,13 +201,13 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
        {120, &Hid::SetNpadJoyHoldType, "SetNpadJoyHoldType"},
        {121, &Hid::GetNpadJoyHoldType, "GetNpadJoyHoldType"},
        {122, &Hid::SetNpadJoyAssignmentModeSingleByDefault, "SetNpadJoyAssignmentModeSingleByDefault"},
-        {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"},
+        {123, &Hid::SetNpadJoyAssignmentModeSingle, "SetNpadJoyAssignmentModeSingle"},
        {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"},
        {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"},
        {126, &Hid::StartLrAssignmentMode, "StartLrAssignmentMode"},
        {127, &Hid::StopLrAssignmentMode, "StopLrAssignmentMode"},
        {128, &Hid::SetNpadHandheldActivationMode, "SetNpadHandheldActivationMode"},
-        {129, nullptr, "GetNpadHandheldActivationMode"},
+        {129, &Hid::GetNpadHandheldActivationMode, "GetNpadHandheldActivationMode"},
        {130, &Hid::SwapNpadAssignment, "SwapNpadAssignment"},
        {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"},
        {132, nullptr, "EnableUnintendedHomeButtonInputProtection"},
@@ -544,10 +544,126 @@ void Hid::SetNpadJoyAssignmentModeSingleByDefault(Kernel::HLERequestContext& ctx
    LOG_WARNING(Service_HID, "(STUBBED) called, npad_id={}, applet_resource_user_id={}", npad_id,
                applet_resource_user_id);

+    auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
+    controller.SetNpadMode(npad_id, Controller_NPad::NPadAssignments::Single);
+
    IPC::ResponseBuilder rb{ctx, 2};
    rb.Push(RESULT_SUCCESS);
 }

+void Hid::SetNpadJoyAssignmentModeSingle(Kernel::HLERequestContext& ctx) {
+    // TODO: Check the differences between this and SetNpadJoyAssignmentModeSingleByDefault
+    IPC::RequestParser rp{ctx};
+    const auto npad_id{rp.Pop<u32>()};
+    const auto applet_resource_user_id{rp.Pop<u64>()};
+    const auto npad_joy_device_type{rp.Pop<u64>()};
+
+    LOG_WARNING(Service_HID,
+                "(STUBBED) called, npad_id={}, applet_resource_user_id={}, npad_joy_device_type={}",
+                npad_id, applet_resource_user_id, npad_joy_device_type);
+
+    auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
+    controller.SetNpadMode(npad_id, Controller_NPad::NPadAssignments::Single);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::SetNpadJoyAssignmentModeDual(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto npad_id{rp.Pop<u32>()};
+    const auto applet_resource_user_id{rp.Pop<u64>()};
+
+    LOG_DEBUG(Service_HID, "called, npad_id={}, applet_resource_user_id={}", npad_id,
+              applet_resource_user_id);
+
+    auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
+    controller.SetNpadMode(npad_id, Controller_NPad::NPadAssignments::Dual);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::MergeSingleJoyAsDualJoy(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto unknown_1{rp.Pop<u32>()};
+    const auto unknown_2{rp.Pop<u32>()};
+    const auto applet_resource_user_id{rp.Pop<u64>()};
+
+    LOG_WARNING(Service_HID,
+                "(STUBBED) called, unknown_1={}, unknown_2={}, applet_resource_user_id={}",
+                unknown_1, unknown_2, applet_resource_user_id);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::StartLrAssignmentMode(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto applet_resource_user_id{rp.Pop<u64>()};
+
+    LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
+    auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
+    controller.StartLRAssignmentMode();
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::StopLrAssignmentMode(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto applet_resource_user_id{rp.Pop<u64>()};
+
+    LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
+    auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
+    controller.StopLRAssignmentMode();
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto applet_resource_user_id{rp.Pop<u64>()};
+    const auto mode{rp.Pop<u64>()};
+
+    LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}, mode={}",
+                applet_resource_user_id, mode);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::GetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto applet_resource_user_id{rp.Pop<u64>()};
+
+    LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
+                applet_resource_user_id);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::SwapNpadAssignment(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto npad_1{rp.Pop<u32>()};
+    const auto npad_2{rp.Pop<u32>()};
+    const auto applet_resource_user_id{rp.Pop<u64>()};
+
+    LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}, npad_1={}, npad_2={}",
+              applet_resource_user_id, npad_1, npad_2);
+
+    auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
+    IPC::ResponseBuilder rb{ctx, 2};
+    if (controller.SwapNpadAssignment(npad_1, npad_2)) {
+        rb.Push(RESULT_SUCCESS);
+    } else {
+        LOG_ERROR(Service_HID, "Npads are not connected!");
+        rb.Push(ERR_NPAD_NOT_CONNECTED);
+    }
+}
+
 void Hid::BeginPermitVibrationSession(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};
    const auto applet_resource_user_id{rp.Pop<u64>()};
@@ -622,47 +738,6 @@ void Hid::GetActualVibrationValue(Kernel::HLERequestContext& ctx) {
        applet_resource->GetController<Controller_NPad>(HidController::NPad).GetLastVibration());
 }

-void Hid::SetNpadJoyAssignmentModeDual(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-    const auto npad_id{rp.Pop<u32>()};
-    const auto applet_resource_user_id{rp.Pop<u64>()};
-
-    LOG_DEBUG(Service_HID, "called, npad_id={}, applet_resource_user_id={}", npad_id,
-              applet_resource_user_id);
-
-    auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
-    controller.SetNpadMode(npad_id, Controller_NPad::NPadAssignments::Dual);
-
-    IPC::ResponseBuilder rb{ctx, 2};
-    rb.Push(RESULT_SUCCESS);
-}
-
-void Hid::MergeSingleJoyAsDualJoy(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-    const auto unknown_1{rp.Pop<u32>()};
-    const auto unknown_2{rp.Pop<u32>()};
-    const auto applet_resource_user_id{rp.Pop<u64>()};
-
-    LOG_WARNING(Service_HID,
-                "(STUBBED) called, unknown_1={}, unknown_2={}, applet_resource_user_id={}",
-                unknown_1, unknown_2, applet_resource_user_id);
-
-    IPC::ResponseBuilder rb{ctx, 2};
-    rb.Push(RESULT_SUCCESS);
-}
-
-void Hid::SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-    const auto applet_resource_user_id{rp.Pop<u64>()};
-    const auto mode{rp.Pop<u64>()};
-
-    LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}, mode={}",
-                applet_resource_user_id, mode);
-
-    IPC::ResponseBuilder rb{ctx, 2};
-    rb.Push(RESULT_SUCCESS);
-}
-
 void Hid::GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_HID, "called");

@@ -756,49 +831,6 @@ void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
    rb.Push(RESULT_SUCCESS);
 }

-void Hid::StartLrAssignmentMode(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-    const auto applet_resource_user_id{rp.Pop<u64>()};
-
-    LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
-    auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
-    controller.StartLRAssignmentMode();
-
-    IPC::ResponseBuilder rb{ctx, 2};
-    rb.Push(RESULT_SUCCESS);
-}
-
-void Hid::StopLrAssignmentMode(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-    const auto applet_resource_user_id{rp.Pop<u64>()};
-
-    LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
-    auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
-    controller.StopLRAssignmentMode();
-
-    IPC::ResponseBuilder rb{ctx, 2};
-    rb.Push(RESULT_SUCCESS);
-}
-
-void Hid::SwapNpadAssignment(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-    const auto npad_1{rp.Pop<u32>()};
-    const auto npad_2{rp.Pop<u32>()};
-    const auto applet_resource_user_id{rp.Pop<u64>()};
-
-    LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}, npad_1={}, npad_2={}",
-              applet_resource_user_id, npad_1, npad_2);
-
-    auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
-    IPC::ResponseBuilder rb{ctx, 2};
-    if (controller.SwapNpadAssignment(npad_1, npad_2)) {
-        rb.Push(RESULT_SUCCESS);
-    } else {
-        LOG_ERROR(Service_HID, "Npads are not connected!");
-        rb.Push(ERR_NPAD_NOT_CONNECTED);
-    }
-}
-
 class HidDbg final : public ServiceFramework<HidDbg> {
 public:
    explicit HidDbg() : ServiceFramework{"hid:dbg"} {
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -105,14 +105,19 @@ private:
    void SetNpadJoyHoldType(Kernel::HLERequestContext& ctx);
    void GetNpadJoyHoldType(Kernel::HLERequestContext& ctx);
    void SetNpadJoyAssignmentModeSingleByDefault(Kernel::HLERequestContext& ctx);
+    void SetNpadJoyAssignmentModeSingle(Kernel::HLERequestContext& ctx);
+    void SetNpadJoyAssignmentModeDual(Kernel::HLERequestContext& ctx);
+    void MergeSingleJoyAsDualJoy(Kernel::HLERequestContext& ctx);
+    void StartLrAssignmentMode(Kernel::HLERequestContext& ctx);
+    void StopLrAssignmentMode(Kernel::HLERequestContext& ctx);
+    void SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx);
+    void GetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx);
+    void SwapNpadAssignment(Kernel::HLERequestContext& ctx);
    void BeginPermitVibrationSession(Kernel::HLERequestContext& ctx);
    void EndPermitVibrationSession(Kernel::HLERequestContext& ctx);
    void SendVibrationValue(Kernel::HLERequestContext& ctx);
    void SendVibrationValues(Kernel::HLERequestContext& ctx);
    void GetActualVibrationValue(Kernel::HLERequestContext& ctx);
-    void SetNpadJoyAssignmentModeDual(Kernel::HLERequestContext& ctx);
-    void MergeSingleJoyAsDualJoy(Kernel::HLERequestContext& ctx);
-    void SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx);
    void GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx);
    void CreateActiveVibrationDeviceList(Kernel::HLERequestContext& ctx);
    void PermitVibration(Kernel::HLERequestContext& ctx);
@@ -122,9 +127,6 @@ private:
    void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
    void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
    void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
-    void StartLrAssignmentMode(Kernel::HLERequestContext& ctx);
-    void StopLrAssignmentMode(Kernel::HLERequestContext& ctx);
-    void SwapNpadAssignment(Kernel::HLERequestContext& ctx);

    std::shared_ptr<IAppletResource> applet_resource;
    Core::System& system;
--- a/src/core/hle/service/lbl/lbl.cpp
+++ b/src/core/hle/service/lbl/lbl.cpp
@@ -10,6 +10,8 @@
 #include "core/hle/service/lbl/lbl.h"
 #include "core/hle/service/service.h"
 #include "core/hle/service/sm/sm.h"
+#include "core/settings.h"
+#include "video_core/renderer_base.h"

 namespace Service::LBL {

@@ -18,21 +20,21 @@ public:
    explicit LBL() : ServiceFramework{"lbl"} {
        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, nullptr, "SaveCurrentSetting"},
-            {1, nullptr, "LoadCurrentSetting"},
-            {2, nullptr, "SetCurrentBrightnessSetting"},
-            {3, nullptr, "GetCurrentBrightnessSetting"},
-            {4, nullptr, "ApplyCurrentBrightnessSettingToBacklight"},
-            {5, nullptr, "GetBrightnessSettingAppliedToBacklight"},
-            {6, nullptr, "SwitchBacklightOn"},
-            {7, nullptr, "SwitchBacklightOff"},
-            {8, nullptr, "GetBacklightSwitchStatus"},
-            {9, nullptr, "EnableDimming"},
-            {10, nullptr, "DisableDimming"},
-            {11, nullptr, "IsDimmingEnabled"},
-            {12, nullptr, "EnableAutoBrightnessControl"},
-            {13, nullptr, "DisableAutoBrightnessControl"},
-            {14, nullptr, "IsAutoBrightnessControlEnabled"},
+            {0, &LBL::SaveCurrentSetting, "SaveCurrentSetting"},
+            {1, &LBL::LoadCurrentSetting, "LoadCurrentSetting"},
+            {2, &LBL::SetCurrentBrightnessSetting, "SetCurrentBrightnessSetting"},
+            {3, &LBL::GetCurrentBrightnessSetting, "GetCurrentBrightnessSetting"},
+            {4, &LBL::ApplyCurrentBrightnessSettingToBacklight, "ApplyCurrentBrightnessSettingToBacklight"},
+            {5, &LBL::GetBrightnessSettingAppliedToBacklight, "GetBrightnessSettingAppliedToBacklight"},
+            {6, &LBL::SwitchBacklightOn, "SwitchBacklightOn"},
+            {7, &LBL::SwitchBacklightOff, "SwitchBacklightOff"},
+            {8, &LBL::GetBacklightSwitchStatus, "GetBacklightSwitchStatus"},
+            {9, &LBL::EnableDimming, "EnableDimming"},
+            {10, &LBL::DisableDimming, "DisableDimming"},
+            {11, &LBL::IsDimmingEnabled, "IsDimmingEnabled"},
+            {12, &LBL::EnableAutoBrightnessControl, "EnableAutoBrightnessControl"},
+            {13, &LBL::DisableAutoBrightnessControl, "DisableAutoBrightnessControl"},
+            {14, &LBL::IsAutoBrightnessControlEnabled, "IsAutoBrightnessControlEnabled"},
            {15, nullptr, "SetAmbientLightSensorValue"},
            {16, nullptr, "GetAmbientLightSensorValue"},
            {17, nullptr, "SetBrightnessReflectionDelayLevel"},
@@ -42,8 +44,8 @@ public:
            {21, nullptr, "SetCurrentAmbientLightSensorMapping"},
            {22, nullptr, "GetCurrentAmbientLightSensorMapping"},
            {23, nullptr, "IsAmbientLightSensorAvailable"},
-            {24, nullptr, "SetCurrentBrightnessSettingForVrMode"},
-            {25, nullptr, "GetCurrentBrightnessSettingForVrMode"},
+            {24, &LBL::SetCurrentBrightnessSettingForVrMode, "SetCurrentBrightnessSettingForVrMode"},
+            {25, &LBL::GetCurrentBrightnessSettingForVrMode, "GetCurrentBrightnessSettingForVrMode"},
            {26, &LBL::EnableVrMode, "EnableVrMode"},
            {27, &LBL::DisableVrMode, "DisableVrMode"},
            {28, &LBL::IsVrModeEnabled, "IsVrModeEnabled"},
@@ -53,13 +55,209 @@ public:
        RegisterHandlers(functions);
    }

+    void LoadFromSettings() {
+        current_brightness = Settings::values.backlight_brightness;
+        current_vr_mode_brightness = Settings::values.backlight_brightness;
+
+        if (auto_brightness_enabled) {
+            return;
+        }
+
+        if (vr_mode_enabled) {
+            Renderer().SetCurrentBrightness(current_vr_mode_brightness);
+        } else {
+            Renderer().SetCurrentBrightness(current_brightness);
+        }
+    }
+
 private:
+    f32 GetAutoBrightnessValue() const {
+        return 0.5f;
+    }
+
+    VideoCore::RendererBase& Renderer() {
+        return Core::System::GetInstance().Renderer();
+    }
+
+    void SaveCurrentSetting(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        Settings::values.backlight_brightness = current_brightness;
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void LoadCurrentSetting(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        LoadFromSettings();
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void SetCurrentBrightnessSetting(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const auto value = rp.PopRaw<f32>();
+
+        LOG_DEBUG(Service_LBL, "called, value={:.3f}", value);
+
+        current_brightness = std::clamp(value, 0.0f, 1.0f);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void GetCurrentBrightnessSetting(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push(current_brightness);
+    }
+
+    void ApplyCurrentBrightnessSettingToBacklight(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        if (!auto_brightness_enabled) {
+            Renderer().SetCurrentBrightness(vr_mode_enabled ? current_vr_mode_brightness
+                                                            : current_brightness);
+        }
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void GetBrightnessSettingAppliedToBacklight(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push(Renderer().GetCurrentResultantBrightness());
+    }
+
+    void SwitchBacklightOn(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const auto fade_time = rp.PopRaw<u64>();
+
+        LOG_DEBUG(Service_LBL, "called, fade_time={:016X}", fade_time);
+
+        Renderer().SetBacklightStatus(true, fade_time);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void SwitchBacklightOff(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const auto fade_time = rp.PopRaw<u64>();
+
+        LOG_DEBUG(Service_LBL, "called, fade_time={:016X}", fade_time);
+
+        Renderer().SetBacklightStatus(false, fade_time);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void GetBacklightSwitchStatus(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push<u8>(Renderer().GetBacklightStatus());
+    }
+
+    void EnableDimming(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        dimming_enabled = true;
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void DisableDimming(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "callled");
+
+        dimming_enabled = false;
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void IsDimmingEnabled(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push<u8>(dimming_enabled);
+    }
+
+    void EnableAutoBrightnessControl(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        auto_brightness_enabled = true;
+        Renderer().SetCurrentBrightness(GetAutoBrightnessValue());
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void DisableAutoBrightnessControl(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        auto_brightness_enabled = false;
+        Renderer().SetCurrentBrightness(current_brightness);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void IsAutoBrightnessControlEnabled(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push<u8>(auto_brightness_enabled);
+    }
+
+    void SetCurrentBrightnessSettingForVrMode(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const auto value = rp.PopRaw<f32>();
+
+        LOG_DEBUG(Service_LBL, "called, value={:.3f}", value);
+
+        current_vr_mode_brightness = std::clamp(value, 0.0f, 1.0f);
+
+        if (vr_mode_enabled && !auto_brightness_enabled) {
+            Renderer().SetCurrentBrightness(value);
+        }
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void GetCurrentBrightnessSettingForVrMode(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LBL, "called");
+
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push(current_vr_mode_brightness);
+    }
+
    void EnableVrMode(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_LBL, "called");

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);

+        if (!vr_mode_enabled && !auto_brightness_enabled &&
+            current_brightness != current_vr_mode_brightness) {
+            Renderer().SetCurrentBrightness(current_vr_mode_brightness);
+        }
+
        vr_mode_enabled = true;
    }

@@ -69,6 +267,11 @@ private:
        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);

+        if (vr_mode_enabled && !auto_brightness_enabled &&
+            current_brightness != current_vr_mode_brightness) {
+            Renderer().SetCurrentBrightness(current_brightness);
+        }
+
        vr_mode_enabled = false;
    }

@@ -80,9 +283,27 @@ private:
        rb.Push(vr_mode_enabled);
    }

+    bool auto_brightness_enabled = false;
+    bool dimming_enabled = true;
+
+    f32 current_brightness = GetAutoBrightnessValue();
+    f32 current_vr_mode_brightness = GetAutoBrightnessValue();
+
    bool vr_mode_enabled = false;
 };

+void RequestLoadCurrentSetting(SM::ServiceManager& sm) {
+    if (&sm == nullptr) {
+        return;
+    }
+
+    const auto lbl = sm.GetService<LBL>("lbl");
+
+    if (lbl) {
+        lbl->LoadFromSettings();
+    }
+}
+
 void InstallInterfaces(SM::ServiceManager& sm) {
    std::make_shared<LBL>()->InstallAsService(sm);
 }
--- a/src/core/hle/service/lbl/lbl.h
+++ b/src/core/hle/service/lbl/lbl.h
@@ -10,6 +10,9 @@ class ServiceManager;

 namespace Service::LBL {

+// Requests the LBL service passed to load brightness values from Settings
+void RequestLoadCurrentSetting(SM::ServiceManager& sm);
+
 void InstallInterfaces(SM::ServiceManager& sm);

 } // namespace Service::LBL
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -22,6 +22,18 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::
    switch (static_cast<IoctlCommand>(command.raw)) {
    case IoctlCommand::IocSetNVMAPfdCommand:
        return SetNVMAPfd(input, output);
+    case IoctlCommand::IocSubmit:
+        return Submit(input, output);
+    case IoctlCommand::IocGetSyncpoint:
+        return GetSyncpoint(input, output);
+    case IoctlCommand::IocGetWaitbase:
+        return GetWaitbase(input, output);
+    case IoctlCommand::IocMapBuffer:
+        return MapBuffer(input, output);
+    case IoctlCommand::IocMapBufferEx:
+        return MapBufferEx(input, output);
+    case IoctlCommand::IocUnmapBufferEx:
+        return UnmapBufferEx(input, output);
    }

    UNIMPLEMENTED_MSG("Unimplemented ioctl");
@@ -30,11 +42,67 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::

 u32 nvhost_nvdec::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlSetNvmapFD params{};
-    std::memcpy(&params, input.data(), input.size());
+    std::memcpy(&params, input.data(), sizeof(IoctlSetNvmapFD));
    LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);

    nvmap_fd = params.nvmap_fd;
    return 0;
 }

+u32 nvhost_nvdec::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlSubmit params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
+    return 0;
+}
+
+u32 nvhost_nvdec::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlGetSyncpoint params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
+    LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
+    params.value = 0; // Seems to be hard coded at 0
+    std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
+    return 0;
+}
+
+u32 nvhost_nvdec::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlGetWaitbase params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
+    LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
+    params.value = 0; // Seems to be hard coded at 0
+    std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
+    return 0;
+}
+
+u32 nvhost_nvdec::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlMapBuffer params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
+                params.address_1);
+    params.address_1 = 0;
+    params.address_2 = 0;
+    std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
+    return 0;
+}
+
+u32 nvhost_nvdec::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlMapBufferEx params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlMapBufferEx));
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
+                params.address_1);
+    params.address_1 = 0;
+    params.address_2 = 0;
+    std::memcpy(output.data(), &params, sizeof(IoctlMapBufferEx));
+    return 0;
+}
+
+u32 nvhost_nvdec::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlUnmapBufferEx params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlUnmapBufferEx));
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    std::memcpy(output.data(), &params, sizeof(IoctlUnmapBufferEx));
+    return 0;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -23,16 +23,66 @@ public:
 private:
    enum class IoctlCommand : u32_le {
        IocSetNVMAPfdCommand = 0x40044801,
+        IocSubmit = 0xC0400001,
+        IocGetSyncpoint = 0xC0080002,
+        IocGetWaitbase = 0xC0080003,
+        IocMapBuffer = 0xC01C0009,
+        IocMapBufferEx = 0xC0A40009,
+        IocUnmapBufferEx = 0xC0A4000A,
    };

    struct IoctlSetNvmapFD {
        u32_le nvmap_fd;
    };
-    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
+    static_assert(sizeof(IoctlSetNvmapFD) == 0x4, "IoctlSetNvmapFD is incorrect size");
+
+    struct IoctlSubmit {
+        INSERT_PADDING_BYTES(0x40); // TODO(DarkLordZach): RE this structure
+    };
+    static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit has incorrect size");
+
+    struct IoctlGetSyncpoint {
+        u32 unknown; // seems to be ignored? Nintendo added this
+        u32 value;
+    };
+    static_assert(sizeof(IoctlGetSyncpoint) == 0x08, "IoctlGetSyncpoint has incorrect size");
+
+    struct IoctlGetWaitbase {
+        u32 unknown; // seems to be ignored? Nintendo added this
+        u32 value;
+    };
+    static_assert(sizeof(IoctlGetWaitbase) == 0x08, "IoctlGetWaitbase has incorrect size");
+
+    struct IoctlMapBuffer {
+        u32 unknown;
+        u32 address_1;
+        u32 address_2;
+        INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure
+    };
+    static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size");
+
+    struct IoctlMapBufferEx {
+        u32 unknown;
+        u32 address_1;
+        u32 address_2;
+        INSERT_PADDING_BYTES(0x98); // TODO(DarkLordZach): RE this structure
+    };
+    static_assert(sizeof(IoctlMapBufferEx) == 0xA4, "IoctlMapBufferEx has incorrect size");
+
+    struct IoctlUnmapBufferEx {
+        INSERT_PADDING_BYTES(0xA4); // TODO(DarkLordZach): RE this structure
+    };
+    static_assert(sizeof(IoctlUnmapBufferEx) == 0xA4, "IoctlUnmapBufferEx has incorrect size");

    u32_le nvmap_fd{};

    u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -22,6 +22,18 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve
    switch (static_cast<IoctlCommand>(command.raw)) {
    case IoctlCommand::IocSetNVMAPfdCommand:
        return SetNVMAPfd(input, output);
+    case IoctlCommand::IocSubmit:
+        return Submit(input, output);
+    case IoctlCommand::IocGetSyncpoint:
+        return GetSyncpoint(input, output);
+    case IoctlCommand::IocGetWaitbase:
+        return GetWaitbase(input, output);
+    case IoctlCommand::IocMapBuffer:
+        return MapBuffer(input, output);
+    case IoctlCommand::IocMapBufferEx:
+        return MapBuffer(input, output);
+    case IoctlCommand::IocUnmapBufferEx:
+        return UnmapBufferEx(input, output);
    }

    UNIMPLEMENTED_MSG("Unimplemented ioctl");
@@ -30,11 +42,67 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve

 u32 nvhost_vic::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlSetNvmapFD params{};
-    std::memcpy(&params, input.data(), input.size());
+    std::memcpy(&params, input.data(), sizeof(IoctlSetNvmapFD));
    LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);

    nvmap_fd = params.nvmap_fd;
    return 0;
 }

+u32 nvhost_vic::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlSubmit params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
+    return 0;
+}
+
+u32 nvhost_vic::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlGetSyncpoint params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
+    LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
+    params.value = 0; // Seems to be hard coded at 0
+    std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
+    return 0;
+}
+
+u32 nvhost_vic::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlGetWaitbase params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
+    LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
+    params.value = 0; // Seems to be hard coded at 0
+    std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
+    return 0;
+}
+
+u32 nvhost_vic::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlMapBuffer params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
+                params.address_1);
+    params.address_1 = 0;
+    params.address_2 = 0;
+    std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
+    return 0;
+}
+
+u32 nvhost_vic::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlMapBufferEx params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlMapBufferEx));
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
+                params.address_1);
+    params.address_1 = 0;
+    params.address_2 = 0;
+    std::memcpy(output.data(), &params, sizeof(IoctlMapBufferEx));
+    return 0;
+}
+
+u32 nvhost_vic::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlUnmapBufferEx params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlUnmapBufferEx));
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    std::memcpy(output.data(), &params, sizeof(IoctlUnmapBufferEx));
+    return 0;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -23,6 +23,12 @@ public:
 private:
    enum class IoctlCommand : u32_le {
        IocSetNVMAPfdCommand = 0x40044801,
+        IocSubmit = 0xC0400001,
+        IocGetSyncpoint = 0xC0080002,
+        IocGetWaitbase = 0xC0080003,
+        IocMapBuffer = 0xC01C0009,
+        IocMapBufferEx = 0xC03C0009,
+        IocUnmapBufferEx = 0xC03C000A,
    };

    struct IoctlSetNvmapFD {
@@ -30,9 +36,53 @@ private:
    };
    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");

+    struct IoctlSubmit {
+        INSERT_PADDING_BYTES(0x40); // TODO(DarkLordZach): RE this structure
+    };
+    static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit is incorrect size");
+
+    struct IoctlGetSyncpoint {
+        u32 unknown; // seems to be ignored? Nintendo added this
+        u32 value;
+    };
+    static_assert(sizeof(IoctlGetSyncpoint) == 0x8, "IoctlGetSyncpoint is incorrect size");
+
+    struct IoctlGetWaitbase {
+        u32 unknown; // seems to be ignored? Nintendo added this
+        u32 value;
+    };
+    static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size");
+
+    struct IoctlMapBuffer {
+        u32 unknown;
+        u32 address_1;
+        u32 address_2;
+        INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure
+    };
+    static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size");
+
+    struct IoctlMapBufferEx {
+        u32 unknown;
+        u32 address_1;
+        u32 address_2;
+        INSERT_PADDING_BYTES(0x30); // TODO(DarkLordZach): RE this structure
+    };
+    static_assert(sizeof(IoctlMapBufferEx) == 0x3C, "IoctlMapBufferEx is incorrect size");
+
+    struct IoctlUnmapBufferEx {
+        INSERT_PADDING_BYTES(0x3C); // TODO(DarkLordZach): RE this structure
+    };
+    static_assert(sizeof(IoctlUnmapBufferEx) == 0x3C, "IoctlUnmapBufferEx is incorrect size");
+
    u32_le nvmap_fd{};

    u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -6,6 +6,8 @@
 #include "core/core.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/service/hid/hid.h"
+#include "core/hle/service/lbl/lbl.h"
+#include "core/hle/service/sm/sm.h"
 #include "core/settings.h"
 #include "video_core/renderer_base.h"

@@ -70,6 +72,7 @@ void Apply() {
    auto& system_instance = Core::System::GetInstance();
    if (system_instance.IsPoweredOn()) {
        system_instance.Renderer().RefreshBaseSettings();
+        Service::LBL::RequestLoadCurrentSetting(system_instance.ServiceManager());
    }

    Service::HID::ReloadInputDevices();
@@ -94,6 +97,7 @@ void LogSettings() {
    LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
    LogSetting("Renderer_UseAsynchronousGpuEmulation",
               Settings::values.use_asynchronous_gpu_emulation);
+    LogSetting("Renderer_UseResolutionScanner", Settings::values.use_resolution_scanner);
    LogSetting("Audio_OutputEngine", Settings::values.sink_id);
    LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
    LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -423,11 +423,14 @@ struct Values {
    bool use_accurate_gpu_emulation;
    bool use_asynchronous_gpu_emulation;
    bool force_30fps_mode;
+    bool use_resolution_scanner;

    float bg_red;
    float bg_green;
    float bg_blue;

+    float backlight_brightness = 0.5f;
+
    std::string log_filter;

    bool use_dev_keys;
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -116,6 +116,8 @@ add_library(video_core STATIC
    shader/track.cpp
    surface.cpp
    surface.h
+    texture_cache/resolution_scaling/database.cpp
+    texture_cache/resolution_scaling/database.h
    texture_cache/surface_base.cpp
    texture_cache/surface_base.h
    texture_cache/surface_params.cpp
@@ -165,7 +167,7 @@ endif()
 create_target_directory_groups(video_core)

 target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad)
+target_link_libraries(video_core PRIVATE glad json-headers)
 if (ENABLE_VULKAN)
    target_link_libraries(video_core PRIVATE sirit)
 endif()
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -40,4 +40,35 @@ void RendererBase::RequestScreenshot(void* data, std::function<void()> callback,
    renderer_settings.screenshot_requested = true;
 }

+f32 RendererBase::GetCurrentResultantBrightness() const {
+    return renderer_settings.current_brightness / 2.0f;
+}
+
+void RendererBase::SetBacklightStatus(bool enabled, u64 fade_transition_time) {
+    if (fade_transition_time == 0) {
+        // Needed to ensure the renderer recognizes that a change must occur.
+        fade_transition_time = 1;
+    }
+
+    if (enabled && renderer_settings.current_brightness == 0) {
+        renderer_settings.current_brightness = current_brightness_backup;
+        renderer_settings.backlight_fade_time = fade_transition_time;
+    } else if (!enabled && renderer_settings.current_brightness != 0) {
+        current_brightness_backup = renderer_settings.current_brightness;
+        renderer_settings.current_brightness = 0;
+        renderer_settings.backlight_fade_time = fade_transition_time;
+    }
+}
+
+bool RendererBase::GetBacklightStatus() const {
+    return renderer_settings.current_brightness != 0;
+}
+
+void RendererBase::SetCurrentBrightness(f32 value) {
+    if (value != renderer_settings.current_brightness) {
+        renderer_settings.current_brightness = value * 2.0f;
+        renderer_settings.backlight_fade_time = 1;
+    }
+}
+
 } // namespace VideoCore
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -28,6 +28,10 @@ struct RendererSettings {
    void* screenshot_bits;
    std::function<void()> screenshot_complete_callback;
    Layout::FramebufferLayout screenshot_framebuffer_layout;
+
+    // Backlight & Brightness
+    std::atomic<f32> current_brightness{1.f};
+    std::atomic<u64> backlight_fade_time{0};
 };

 class RendererBase : NonCopyable {
@@ -86,6 +90,17 @@ public:
    void RequestScreenshot(void* data, std::function<void()> callback,
                           const Layout::FramebufferLayout& layout);

+    // Gets the current brightness, even if it has been changed from the set value. Most of the time
+    // for yuzu this will simply match what was returned, but implementations are free to change the
+    // value in settings.
+    f32 GetCurrentResultantBrightness() const;
+
+    void SetBacklightStatus(bool enabled, u64 fade_transition_time);
+
+    bool GetBacklightStatus() const;
+
+    void SetCurrentBrightness(f32 value);
+
 protected:
    Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
    std::unique_ptr<RasterizerInterface> rasterizer;
@@ -97,6 +112,9 @@ protected:
 private:
    /// Updates the framebuffer layout of the contained render window handle.
    void UpdateCurrentFramebufferLayout();
+
+    // Value of brightness before backlight switch used to preserve value.
+    f32 current_brightness_backup;
 };

 } // namespace VideoCore
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -251,7 +251,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
        if (!gpu.regs.IsShaderConfigEnabled(index)) {
            switch (program) {
            case Maxwell::ShaderProgram::Geometry:
-                shader_program_manager->UseTrivialGeometryShader();
+                shader_program_manager->BindGeometryShader(nullptr);
+                break;
+            case Maxwell::ShaderProgram::Fragment:
+                shader_program_manager->BindFragmentShader(nullptr);
                break;
            default:
                break;
@@ -261,14 +264,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {

        const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5

-        GLShader::MaxwellUniformData ubo{};
-        ubo.SetFromRegs(gpu, stage);
-        const auto [buffer, offset] =
-            buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
-
-        // Bind the emulation info buffer
-        bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
-
        Shader shader{shader_cache.GetStageProgram(program)};

        const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
@@ -282,13 +277,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
        switch (program) {
        case Maxwell::ShaderProgram::VertexA:
        case Maxwell::ShaderProgram::VertexB:
-            shader_program_manager->UseProgrammableVertexShader(program_handle);
+            shader_program_manager->BindVertexShader(&program_handle);
            break;
        case Maxwell::ShaderProgram::Geometry:
-            shader_program_manager->UseProgrammableGeometryShader(program_handle);
+            shader_program_manager->BindGeometryShader(&program_handle);
            break;
        case Maxwell::ShaderProgram::Fragment:
-            shader_program_manager->UseProgrammableFragmentShader(program_handle);
+            shader_program_manager->BindFragmentShader(&program_handle);
            break;
        default:
            UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
@@ -380,6 +375,7 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
 void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
                                         const VideoCore::DiskResourceLoadCallback& callback) {
    shader_cache.LoadDiskCache(stop_loading, callback);
+    texture_cache.LoadResources();
 }

 void RasterizerOpenGL::ConfigureFramebuffers() {
@@ -426,7 +422,6 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
    texture_cache.GuardRenderTargets(false);

    state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey);
-    SyncViewport(state);
 }

 void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
@@ -552,9 +547,16 @@ void RasterizerOpenGL::Clear() {

    ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);

-    SyncViewport(clear_state);
+    bool res_scaling;
+    if (use_color) {
+        res_scaling = texture_cache.IsResolutionScalingEnabledRT(regs.clear_buffers.RT);
+    } else {
+        res_scaling = texture_cache.IsResolutionScalingEnabledDB();
+    }
+
+    SyncViewport(clear_state, res_scaling);
    if (regs.clear_flags.scissor) {
-        SyncScissorTest(clear_state);
+        SyncScissorTest(clear_state, res_scaling);
    }

    if (regs.clear_flags.viewport) {
@@ -589,7 +591,6 @@ void RasterizerOpenGL::DrawPrelude() {
    SyncLogicOpState();
    SyncCullMode();
    SyncPrimitiveRestart();
-    SyncScissorTest(state);
    SyncTransformFeedback();
    SyncPointState();
    SyncPolygonOffset();
@@ -605,11 +606,6 @@ void RasterizerOpenGL::DrawPrelude() {
        buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
    }

-    // Uniform space for the 5 shader stages
-    buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) +
-                  (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) *
-                      Maxwell::MaxShaderStage;
-
    // Add space for at least 18 constant buffers
    buffer_size += Maxwell::MaxConstBuffers *
                   (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
@@ -651,6 +647,11 @@ void RasterizerOpenGL::DrawPrelude() {
        gpu.dirty.ResetVertexArrays();
    }

+    const bool res_scaling = texture_cache.IsResolutionScalingEnabled();
+    SyncViewport(state, res_scaling);
+    SyncScissorTest(state, res_scaling);
+
+    shader_program_manager->SetConstants(gpu, res_scaling);
    shader_program_manager->ApplyTo(state);
    state.Apply();

@@ -773,7 +774,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
    SetupComputeImages(kernel);

    const auto [program, next_bindings] = kernel->GetProgramHandle(variant);
-    state.draw.shader_program = program;
+    state.draw.shader_program = program.handle;
    state.draw.program_pipeline = 0;

    const std::size_t buffer_size =
@@ -1072,20 +1073,21 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
    state.images[binding] = view->GetTexture();
 }

-void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
+void RasterizerOpenGL::SyncViewport(OpenGLState& current_state, bool rescaling) {
    const auto& regs = system.GPU().Maxwell3D().regs;
    const bool geometry_shaders_enabled =
        regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
    const std::size_t viewport_count =
        geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1;
+    const float factor = rescaling ? Settings::values.resolution_factor : 1.0f;
    for (std::size_t i = 0; i < viewport_count; i++) {
        auto& viewport = current_state.viewports[i];
        const auto& src = regs.viewports[i];
        const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
-        viewport.x = viewport_rect.left;
-        viewport.y = viewport_rect.bottom;
-        viewport.width = viewport_rect.GetWidth();
-        viewport.height = viewport_rect.GetHeight();
+        viewport.x = static_cast<GLint>(viewport_rect.left * factor);
+        viewport.y = static_cast<GLint>(viewport_rect.bottom * factor);
+        viewport.width = static_cast<GLint>(viewport_rect.GetWidth() * factor);
+        viewport.height = static_cast<GLint>(viewport_rect.GetHeight() * factor);
        viewport.depth_range_far = src.depth_range_far;
        viewport.depth_range_near = src.depth_range_near;
    }
@@ -1296,12 +1298,13 @@ void RasterizerOpenGL::SyncLogicOpState() {
    state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation);
 }

-void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
+void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state, bool rescaling) {
    const auto& regs = system.GPU().Maxwell3D().regs;
    const bool geometry_shaders_enabled =
        regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
    const std::size_t viewport_count =
        geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1;
+    const float factor = rescaling ? Settings::values.resolution_factor : 1.0f;
    for (std::size_t i = 0; i < viewport_count; i++) {
        const auto& src = regs.scissor_test[i];
        auto& dst = current_state.viewports[i].scissor;
@@ -1311,10 +1314,10 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
        }
        const u32 width = src.max_x - src.min_x;
        const u32 height = src.max_y - src.min_y;
-        dst.x = src.min_x;
-        dst.y = src.min_y;
-        dst.width = width;
-        dst.height = height;
+        dst.x = static_cast<u32>(src.min_x * factor);
+        dst.y = static_cast<u32>(src.min_y * factor);
+        dst.width = static_cast<u32>(width * factor);
+        dst.height = static_cast<u32>(height * factor);
    }
 }

--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -128,7 +128,7 @@ private:
                    const GLShader::ImageEntry& entry);

    /// Syncs the viewport and depth range to match the guest state
-    void SyncViewport(OpenGLState& current_state);
+    void SyncViewport(OpenGLState& current_state, bool rescaling);

    /// Syncs the clip enabled status to match the guest state
    void SyncClipEnabled(
@@ -162,7 +162,7 @@ private:
    void SyncMultiSampleState();

    /// Syncs the scissor test state to match the guest state
-    void SyncScissorTest(OpenGLState& current_state);
+    void SyncScissorTest(OpenGLState& current_state, bool rescaling);

    /// Syncs the transform feedback state to match the guest state
    void SyncTransformFeedback();
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -23,9 +23,6 @@ namespace OpenGL {

 using VideoCommon::Shader::ProgramCode;

-// One UBO is always reserved for emulation values on staged shaders
-constexpr u32 STAGE_RESERVED_UBOS = 1;
-
 struct UnspecializedShader {
    std::string code;
    GLShader::ShaderEntries entries;
@@ -224,10 +221,6 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
    }
    source += '\n';

-    if (program_type != ProgramType::Compute) {
-        source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
-    }
-
    for (const auto& cbuf : entries.const_buffers) {
        source +=
            fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
@@ -273,7 +266,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
    OGLShader shader;
    shader.Create(source.c_str(), GetShaderType(program_type));

-    auto program = std::make_shared<OGLProgram>();
+    auto program = std::make_shared<GLShader::StageProgram>();
    program->Create(true, hint_retrievable, shader.handle);
    return program;
 }
@@ -348,28 +341,26 @@ Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
        new CachedShader(params, ProgramType::Compute, std::move(result)));
 }

-std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
+std::tuple<GLShader::StageProgram&, BaseBindings> CachedShader::GetProgramHandle(
+    const ProgramVariant& variant) {
    const auto [entry, is_cache_miss] = programs.try_emplace(variant);
-    auto& program = entry->second;
+    auto& stage_program = entry->second;
    if (is_cache_miss) {
-        program = TryLoadProgram(variant);
-        if (!program) {
-            program = SpecializeShader(code, entries, program_type, variant);
+        stage_program = TryLoadProgram(variant);
+        if (!stage_program) {
+            stage_program = SpecializeShader(code, entries, program_type, variant);
            disk_cache.SaveUsage(GetUsage(variant));
        }

-        LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
+        LabelGLObject(GL_PROGRAM, stage_program->handle, cpu_addr);
    }

-    auto base_bindings = variant.base_bindings;
+    auto base_bindings{variant.base_bindings};
    base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
-    if (program_type != ProgramType::Compute) {
-        base_bindings.cbuf += STAGE_RESERVED_UBOS;
-    }
    base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
    base_bindings.sampler += static_cast<u32>(entries.samplers.size());

-    return {program->handle, base_bindings};
+    return {*stage_program, base_bindings};
 }

 CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
@@ -516,7 +507,7 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
        return {};
    }

-    CachedProgram shader = std::make_shared<OGLProgram>();
+    CachedProgram shader = std::make_shared<GLShader::StageProgram>();
    shader->handle = glCreateProgram();
    glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
    glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -20,6 +20,7 @@
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"

 namespace Core {
 class System;
@@ -37,7 +38,7 @@ class RasterizerOpenGL;
 struct UnspecializedShader;

 using Shader = std::shared_ptr<CachedShader>;
-using CachedProgram = std::shared_ptr<OGLProgram>;
+using CachedProgram = std::shared_ptr<GLShader::StageProgram>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
 using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
@@ -80,7 +81,8 @@ public:
    }

    /// Gets the GL program handle for the shader
-    std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
+    std::tuple<GLShader::StageProgram&, BaseBindings> GetProgramHandle(
+        const ProgramVariant& variant);

 private:
    explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -933,9 +933,18 @@ private:
                return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
                                    GetSwizzle(element)),
                        Type::Float};
-            case ProgramType::Fragment:
-                return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)),
-                        Type::Float};
+            case ProgramType::Fragment: {
+                switch (element) {
+                case 0:
+                    return {"(gl_FragCoord.x / utof(config_pack[3]))", Type::Float};
+                case 1:
+                    return {"(gl_FragCoord.y / utof(config_pack[3]))", Type::Float};
+                case 2:
+                    return {"gl_FragCoord.z", Type::Float};
+                case 3:
+                    return {"1.0f", Type::Float};
+                }
+            }
            default:
                UNREACHABLE();
            }
@@ -2267,7 +2276,10 @@ std::string GetCommonDeclarations() {
        "    bvec2 is_nan2 = isnan(pair2);\n"
        "    return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
        "is_nan2.y);\n"
-        "}}\n\n");
+        "}}\n\n"
+        "layout(location = 0) uniform uvec4 config_pack; // instance_id, flip_stage, y_direction, "
+        "padding\n"
+        "layout(location = 1) uniform vec2 viewport_flip;\n\n");
 }

 ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -23,14 +23,6 @@ ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setu
    std::string out = "// Shader Unique Id: VS" + id + "\n\n";
    out += GetCommonDeclarations();

-    out += R"(
-layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
-    vec4 viewport_flip;
-    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
-};
-
-)";
-
    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
    const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
    ProgramResult program = Decompile(device, program_ir, stage, "vertex");
@@ -72,14 +64,6 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se
    std::string out = "// Shader Unique Id: GS" + id + "\n\n";
    out += GetCommonDeclarations();

-    out += R"(
-layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
-    vec4 viewport_flip;
-    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
-};
-
-)";
-
    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
    ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
    out += program.first;
@@ -87,7 +71,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
    out += R"(
 void main() {
    execute_geometry();
-};)";
+})";

    return {std::move(out), std::move(program.second)};
 }
@@ -108,11 +92,6 @@ layout (location = 5) out vec4 FragColor5;
 layout (location = 6) out vec4 FragColor6;
 layout (location = 7) out vec4 FragColor7;

-layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
-    vec4 viewport_flip;
-    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
-};
-
 )";
    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
    ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,13 +2,34 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <array>
 #include "common/common_types.h"
+#include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"

 namespace OpenGL::GLShader {

-using Tegra::Engines::Maxwell3D;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+StageProgram::StageProgram() = default;
+
+StageProgram::~StageProgram() = default;
+
+void StageProgram::UpdateConstants() {
+    enum ProgramLocations : u32 {
+        CONFIG_PACK = 0,
+        VIEWPORT_SCALE = 1,
+    };
+    if (state.config_pack != old_state.config_pack) {
+        glProgramUniform4uiv(handle, CONFIG_PACK, 1, state.config_pack.data());
+        old_state.config_pack = state.config_pack;
+    }
+    if (state.viewport_scale != old_state.viewport_scale) {
+        glProgramUniform2fv(handle, VIEWPORT_SCALE, 1, state.viewport_scale.data());
+        old_state.viewport_scale = state.viewport_scale;
+    }
+}

 ProgramManager::ProgramManager() {
    pipeline.Create();
@@ -16,12 +37,59 @@ ProgramManager::ProgramManager() {

 ProgramManager::~ProgramManager() = default;

+void ProgramManager::SetConstants(Tegra::Engines::Maxwell3D& maxwell_3d, bool rescaling) {
+    const auto& regs = maxwell_3d.regs;
+    const auto& state = maxwell_3d.state;
+
+    // TODO(bunnei): Support more than one viewport
+    const GLfloat flip_x = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
+    const GLfloat flip_y = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
+
+    const GLuint instance_id = state.current_instance;
+
+    // Assign in which stage the position has to be flipped (the last stage before the fragment
+    // shader).
+    const GLuint flip_stage = [&]() {
+        constexpr u32 geometry_index = static_cast<u32>(Maxwell::ShaderProgram::Geometry);
+        if (regs.shader_config[geometry_index].enable) {
+            return geometry_index;
+        } else {
+            return static_cast<u32>(Maxwell::ShaderProgram::VertexB);
+        }
+    }();
+
+    // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
+    const GLfloat y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
+
+    const GLfloat rescale_factor = rescaling ? Settings::values.resolution_factor : 1.0f;
+
+    for (const auto stage :
+         {current_state.vertex, current_state.geometry, current_state.fragment}) {
+        if (!stage) {
+            continue;
+        }
+        stage->SetInstanceID(instance_id);
+        stage->SetFlipStage(flip_stage);
+        stage->SetYDirection(y_direction);
+        stage->SetViewportScale(flip_x, flip_y);
+        stage->SetRescalingFactor(rescale_factor);
+        stage->UpdateConstants();
+    }
+}
+
 void ProgramManager::ApplyTo(OpenGLState& state) {
    UpdatePipeline();
    state.draw.shader_program = 0;
    state.draw.program_pipeline = pipeline.handle;
 }

+GLuint GetHandle(StageProgram* program) {
+    if (!program) {
+        return 0;
+    }
+    return program->handle;
+}
+
 void ProgramManager::UpdatePipeline() {
    // Avoid updating the pipeline when values have no changed
    if (old_state == current_state) {
@@ -33,34 +101,11 @@ void ProgramManager::UpdatePipeline() {
                                     GL_FRAGMENT_SHADER_BIT};
    glUseProgramStages(pipeline.handle, all_used_stages, 0);

-    glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
-    glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
-    glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
+    glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, GetHandle(current_state.vertex));
+    glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, GetHandle(current_state.geometry));
+    glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, GetHandle(current_state.fragment));

    old_state = current_state;
 }

-void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
-    const auto& regs = maxwell.regs;
-    const auto& state = maxwell.state;
-
-    // TODO(bunnei): Support more than one viewport
-    viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
-    viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
-
-    instance_id = state.current_instance;
-
-    // Assign in which stage the position has to be flipped
-    // (the last stage before the fragment shader).
-    constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
-    if (maxwell.regs.shader_config[geometry_index].enable) {
-        flip_stage = geometry_index;
-    } else {
-        flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
-    }
-
-    // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
-    y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f;
-}
-
 } // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,71 +4,103 @@

 #pragma once

+#include <array>
 #include <cstddef>
+#include <tuple>

 #include <glad/glad.h>

+#include "common/common_types.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"

+namespace Tegra::Engines {
+class Maxwell3D;
+}
+
 namespace OpenGL::GLShader {

-/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
-///       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-///       Not following that rule will cause problems on some AMD drivers.
-struct MaxwellUniformData {
-    void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
+class StageProgram final : public OGLProgram {
+public:
+    explicit StageProgram();
+    ~StageProgram();

-    alignas(16) GLvec4 viewport_flip;
-    struct alignas(16) {
-        GLuint instance_id;
-        GLuint flip_stage;
-        GLfloat y_direction;
+    void UpdateConstants();
+
+    void SetInstanceID(GLuint instance_id) {
+        state.instance_id = instance_id;
+    }
+
+    void SetFlipStage(GLuint flip_stage) {
+        state.flip_stage = flip_stage;
+    }
+
+    void SetYDirection(GLfloat y_direction) {
+        state.y_direction = y_direction;
+    }
+
+    void SetRescalingFactor(GLfloat rescaling_factor) {
+        state.rescaling_factor = rescaling_factor;
+    }
+
+    void SetViewportScale(GLfloat x, GLfloat y) {
+        state.viewport_scale = {x, y};
+    }
+
+private:
+    struct State {
+        union {
+            std::array<GLuint, 4> config_pack{};
+            struct {
+                GLuint instance_id;
+                GLuint flip_stage;
+                GLfloat y_direction;
+                GLfloat rescaling_factor;
+            };
+        };
+
+        std::array<GLfloat, 2> viewport_scale{};
    };
-};
-static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
-static_assert(sizeof(MaxwellUniformData) < 16384,
-              "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");

-class ProgramManager {
+    State state;
+    State old_state;
+};
+
+class ProgramManager final {
 public:
    explicit ProgramManager();
    ~ProgramManager();

+    void SetConstants(Tegra::Engines::Maxwell3D& maxwell_3d, bool rescaling);
+
    void ApplyTo(OpenGLState& state);

-    void UseProgrammableVertexShader(GLuint program) {
-        current_state.vertex_shader = program;
+    void BindVertexShader(StageProgram* program) {
+        current_state.vertex = program;
    }

-    void UseProgrammableGeometryShader(GLuint program) {
-        current_state.geometry_shader = program;
+    void BindGeometryShader(StageProgram* program) {
+        current_state.geometry = program;
    }

-    void UseProgrammableFragmentShader(GLuint program) {
-        current_state.fragment_shader = program;
-    }
-
-    void UseTrivialGeometryShader() {
-        current_state.geometry_shader = 0;
+    void BindFragmentShader(StageProgram* program) {
+        current_state.fragment = program;
    }

 private:
    struct PipelineState {
        bool operator==(const PipelineState& rhs) const {
-            return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
-                   geometry_shader == rhs.geometry_shader;
+            return vertex == rhs.vertex && fragment == rhs.fragment && geometry == rhs.geometry;
        }

        bool operator!=(const PipelineState& rhs) const {
            return !operator==(rhs);
        }

-        GLuint vertex_shader{};
-        GLuint fragment_shader{};
-        GLuint geometry_shader{};
+        StageProgram* vertex{};
+        StageProgram* fragment{};
+        StageProgram* geometry{};
    };

    void UpdatePipeline();
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -199,7 +199,7 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
 }

 OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format,
-                         OGLBuffer& texture_buffer) {
+                         OGLBuffer& texture_buffer, u32 resolution_factor) {
    OGLTexture texture;
    texture.Create(target);

@@ -214,6 +214,9 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
        glTextureBuffer(texture.handle, internal_format, texture_buffer.handle);
        break;
    case SurfaceTarget::Texture2D:
+        glTextureStorage2D(texture.handle, params.emulated_levels, internal_format,
+                           params.width * resolution_factor, params.height * resolution_factor);
+        break;
    case SurfaceTarget::TextureCubemap:
        glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
                           params.height);
@@ -242,8 +245,13 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
    format = tuple.format;
    type = tuple.type;
    is_compressed = tuple.compressed;
+}
+
+void CachedSurface::Init() {
    target = GetTextureTarget(params.target);
-    texture = CreateTexture(params, target, internal_format, texture_buffer);
+    const u32 resolution_factor =
+        IsRescaled() ? static_cast<u32>(Settings::values.resolution_factor) : 1;
+    texture = CreateTexture(params, target, internal_format, texture_buffer, resolution_factor);
    DecorateSurfaceName();
    main_view = CreateViewInner(
        ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
@@ -461,7 +469,10 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
 TextureCacheOpenGL::~TextureCacheOpenGL() = default;

 Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
-    return std::make_shared<CachedSurface>(gpu_addr, params);
+    Surface new_surface = std::make_shared<CachedSurface>(gpu_addr, params);
+    SignalCreatedSurface(new_surface);
+    new_surface->Init();
+    return new_surface;
 }

 void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
@@ -472,15 +483,21 @@ void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
        // A fallback is needed
        return;
    }
+    const bool src_rescaled = src_surface->IsRescaled();
+    const bool dst_rescaled = dst_surface->IsRescaled();
+    if (src_rescaled != dst_rescaled) {
+        LOG_CRITICAL(HW_GPU, "Rescaling Database is incorrectly set! Rescan the database!.");
+    }
+    const u32 factor = src_rescaled ? static_cast<u32>(Settings::values.resolution_factor) : 1U;
    const auto src_handle = src_surface->GetTexture();
    const auto src_target = src_surface->GetTarget();
    const auto dst_handle = dst_surface->GetTexture();
    const auto dst_target = dst_surface->GetTarget();
-    glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x,
-                       copy_params.source_y, copy_params.source_z, dst_handle, dst_target,
-                       copy_params.dest_level, copy_params.dest_x, copy_params.dest_y,
-                       copy_params.dest_z, copy_params.width, copy_params.height,
-                       copy_params.depth);
+    glCopyImageSubData(src_handle, src_target, copy_params.source_level,
+                       copy_params.source_x * factor, copy_params.source_y * factor,
+                       copy_params.source_z, dst_handle, dst_target, copy_params.dest_level,
+                       copy_params.dest_x * factor, copy_params.dest_y * factor, copy_params.dest_z,
+                       copy_params.width * factor, copy_params.height * factor, copy_params.depth);
 }

 void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
@@ -539,8 +556,14 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
    const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
    const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;

-    glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
-                      dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
+    const bool src_rescaled = src_view->GetParent().IsRescaled();
+    const bool dst_rescaled = dst_view->GetParent().IsRescaled();
+    const u32 factor1 = src_rescaled ? static_cast<u32>(Settings::values.resolution_factor) : 1U;
+    const u32 factor2 = dst_rescaled ? static_cast<u32>(Settings::values.resolution_factor) : 1U;
+
+    glBlitFramebuffer(src_rect.left * factor1, src_rect.top * factor1, src_rect.right * factor1,
+                      src_rect.bottom * factor1, dst_rect.left * factor2, dst_rect.top * factor2,
+                      dst_rect.right * factor2, dst_rect.bottom * factor2, buffers,
                      is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
 }

@@ -553,8 +576,15 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
    const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
    const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);

-    const std::size_t source_size = src_surface->GetHostSizeInBytes();
-    const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
+    const bool src_rescaled = src_surface->IsRescaled();
+    const bool dst_rescaled = dst_surface->IsRescaled();
+    if (src_rescaled != dst_rescaled) {
+        LOG_CRITICAL(HW_GPU, "Rescaling Database is incorrectly set! Rescan the database!.");
+    }
+    const u32 factor = src_rescaled ? static_cast<u32>(Settings::values.resolution_factor) : 1U;
+
+    const std::size_t source_size = src_surface->GetHostSizeInBytes() * factor * factor;
+    const std::size_t dest_size = dst_surface->GetHostSizeInBytes() * factor * factor;

    const std::size_t buffer_size = std::max(source_size, dest_size);

@@ -573,8 +603,8 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)

    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);

-    const GLsizei width = static_cast<GLsizei>(dst_params.width);
-    const GLsizei height = static_cast<GLsizei>(dst_params.height);
+    const GLsizei width = static_cast<GLsizei>(dst_params.width * factor);
+    const GLsizei height = static_cast<GLsizei>(dst_params.height * factor);
    const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
    if (dest_format.compressed) {
        LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -39,6 +39,8 @@ public:
    explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params);
    ~CachedSurface();

+    void Init();
+
    void UploadTexture(const std::vector<u8>& staging_buffer) override;
    void DownloadTexture(std::vector<u8>& staging_buffer) override;

@@ -96,6 +98,10 @@ public:
        return texture_view.handle;
    }

+    const CachedSurface& GetParent() const {
+        return surface;
+    }
+
    const SurfaceParams& GetSurfaceParams() const {
        return surface.GetSurfaceParams();
    }
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -54,11 +54,13 @@ in vec2 frag_tex_coord;
 out vec4 color;

 uniform sampler2D color_texture;
+uniform vec4 backlight;

 void main() {
    // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
    // support more framebuffer pixel formats.
-    color = texture(color_texture, frag_tex_coord);
+    // Also multiply the color by the backlight multiplier supplied.
+    color = texture(color_texture, frag_tex_coord) * backlight;
 }
 )";

@@ -123,8 +125,13 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
        // Load the framebuffer from memory, draw it to the screen, and swap buffers
        LoadFBToScreenInfo(*framebuffer);

-        if (renderer_settings.screenshot_requested)
+        if (renderer_settings.screenshot_requested) {
            CaptureScreenshot();
+        }
+
+        if (renderer_settings.backlight_fade_time > 0) {
+            UpdateBacklight();
+        }

        DrawScreen(render_window.GetFramebufferLayout());

@@ -210,9 +217,13 @@ void RendererOpenGL::InitOpenGLObjects() {
    state.Apply();
    uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
    uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
+    uniform_backlight = glGetUniformLocation(shader.handle, "backlight");
    attrib_position = glGetAttribLocation(shader.handle, "vert_position");
    attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");

+    // Initialize backlight
+    glUniform4f(uniform_backlight, 1.f, 1.f, 1.f, 1.f);
+
    // Generate VBO handle for drawing
    vertex_buffer.Create();

@@ -421,6 +432,29 @@ void RendererOpenGL::CaptureScreenshot() {
    renderer_settings.screenshot_requested = false;
 }

+void RendererOpenGL::UpdateBacklight() {
+    constexpr u64 PER_FRAME_FADE_TIME = 1000000000.0f / 60;
+
+    const auto fade_time = renderer_settings.backlight_fade_time.load(std::memory_order_relaxed);
+    auto value = renderer_settings.current_brightness.load(std::memory_order_relaxed);
+    if (fade_time <= PER_FRAME_FADE_TIME) {
+        glUniform4f(uniform_backlight, value, value, value, value);
+        renderer_settings.backlight_fade_time = 0;
+        fade_time_max = 0;
+    } else {
+        if (fade_time_max == 0) {
+            fade_time_max = fade_time;
+            value_max = value;
+        }
+
+        value += (value_max - value) * PER_FRAME_FADE_TIME / fade_time_max;
+
+        glUniform4f(uniform_backlight, value, value, value, value);
+        renderer_settings.backlight_fade_time -= PER_FRAME_FADE_TIME;
+        renderer_settings.current_brightness = value;
+    }
+}
+
 static const char* GetSource(GLenum source) {
 #define RET(s)                                                                                     \
    case GL_DEBUG_SOURCE_##s:                                                                      \
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -70,6 +70,7 @@ private:
    void UpdateFramerate();

    void CaptureScreenshot();
+    void UpdateBacklight();

    // Loads framebuffer from emulated memory into the display information structure
    void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
@@ -97,6 +98,7 @@ private:
    // Shader uniform location indices
    GLuint uniform_modelview_matrix;
    GLuint uniform_color_texture;
+    GLuint uniform_backlight;

    // Shader attribute input indices
    GLuint attrib_position;
@@ -105,6 +107,10 @@ private:
    /// Used for transforming the framebuffer orientation
    Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
    Common::Rectangle<int> framebuffer_crop_rect;
+
+    // Used for backlight transitions
+    u64 fade_time_max = 0;
+    f32 value_max = 0;
 };

 } // namespace OpenGL
--- a/src/video_core/texture_cache/resolution_scaling/database.cpp
+++ b/src/video_core/texture_cache/resolution_scaling/database.cpp
@@ -0,0 +1,121 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <fstream>
+#include <fmt/format.h>
+#include <json.hpp>
+
+#include "common/assert.h"
+#include "common/common_paths.h"
+#include "common/common_types.h"
+#include "common/file_util.h"
+#include "core/core.h"
+#include "core/hle/kernel/process.h"
+#include "video_core/texture_cache/resolution_scaling/database.h"
+
+namespace VideoCommon::Resolution {
+
+using namespace nlohmann;
+
+std::string GetBaseDir() {
+    return FileUtil::GetUserPath(FileUtil::UserPath::RescalingDir);
+}
+
+ScalingDatabase::ScalingDatabase(Core::System& system) : system{system} {}
+
+ScalingDatabase::~ScalingDatabase() {
+    SaveDatabase();
+}
+
+void ScalingDatabase::Init() {
+    title_id = system.CurrentProcess()->GetTitleID();
+    LoadDatabase();
+    initialized = true;
+}
+
+void ScalingDatabase::LoadDatabase() {
+    const std::string path = GetProfilePath();
+    const bool exists = FileUtil::Exists(path);
+    if (!exists) {
+        return;
+    }
+    std::ifstream file;
+    OpenFStream(file, path, std::ios_base::in);
+    json in;
+    file >> in;
+    u32 version = in["version"].get<u32>();
+    if (version != DBVersion) {
+        return;
+    }
+    for (const auto& entry : in["entries"]) {
+        ResolutionKey key{};
+        key.format = static_cast<PixelFormat>(entry["format"].get<u32>());
+        key.width = entry["width"].get<u32>();
+        key.height = entry["height"].get<u32>();
+        database.insert(key);
+    }
+    for (const auto& entry : in["blacklist"]) {
+        ResolutionKey key{};
+        key.format = static_cast<PixelFormat>(entry["format"].get<u32>());
+        key.width = entry["width"].get<u32>();
+        key.height = entry["height"].get<u32>();
+        blacklist.insert(key);
+    }
+}
+
+void ScalingDatabase::SaveDatabase() {
+    const std::string dir = GetBaseDir();
+    if (!FileUtil::CreateDir(dir)) {
+        LOG_ERROR(HW_GPU, "Failed to create directory={}", dir);
+        return;
+    }
+    json out;
+    out.emplace("version", DBVersion);
+    auto entries = json::array();
+    for (const auto& key : database) {
+        entries.push_back({
+            {"format", static_cast<u32>(key.format)},
+            {"width", key.width},
+            {"height", key.height},
+        });
+    }
+    out.emplace("entries", std::move(entries));
+    auto blacklist_entries = json::array();
+    for (const auto& key : blacklist) {
+        blacklist_entries.push_back({
+            {"format", static_cast<u32>(key.format)},
+            {"width", key.width},
+            {"height", key.height},
+        });
+    }
+    out.emplace("blacklist", std::move(blacklist_entries));
+    const std::string path = GetProfilePath();
+    std::ofstream file;
+    OpenFStream(file, path, std::ios_base::out);
+    file << std::setw(4) << out << std::endl;
+}
+
+void ScalingDatabase::Register(PixelFormat format, u32 width, u32 height) {
+    const ResolutionKey key{format, width, height};
+    if (blacklist.count(key) == 0) {
+        database.insert(key);
+    }
+}
+
+void ScalingDatabase::Unregister(PixelFormat format, u32 width, u32 height) {
+    const ResolutionKey key{format, width, height};
+    database.erase(key);
+    blacklist.insert(key);
+}
+
+std::string ScalingDatabase::GetTitleID() const {
+    return fmt::format("{:016X}", title_id);
+}
+
+std::string ScalingDatabase::GetProfilePath() const {
+    return FileUtil::SanitizePath(GetBaseDir() + DIR_SEP_CHR + GetTitleID() + ".json");
+}
+
+} // namespace VideoCommon::Resolution
--- a/src/video_core/texture_cache/resolution_scaling/database.h
+++ b/src/video_core/texture_cache/resolution_scaling/database.h
@@ -0,0 +1,88 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_set>
+#include "video_core/surface.h"
+
+namespace Core {
+class System;
+}
+
+namespace VideoCommon::Resolution {
+
+using VideoCore::Surface::PixelFormat;
+
+struct ResolutionKey {
+    PixelFormat format;
+    u32 width;
+    u32 height;
+    std::size_t Hash() const {
+        const std::size_t comp1 = static_cast<std::size_t>(format) << 44;
+        const std::size_t comp2 = static_cast<std::size_t>(height) << 24;
+        const std::size_t comp3 = static_cast<std::size_t>(width);
+        return comp1 | comp2 | comp3;
+    }
+
+    bool operator==(const ResolutionKey& ks) const {
+        return std::tie(format, width, height) == std::tie(ks.format, ks.width, ks.height);
+    }
+
+    bool operator!=(const ResolutionKey& ks) const {
+        return !(*this == ks);
+    }
+};
+
+} // namespace VideoCommon::Resolution
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::Resolution::ResolutionKey> {
+    std::size_t operator()(const VideoCommon::Resolution::ResolutionKey& k) const {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace VideoCommon::Resolution {
+
+class ScalingDatabase {
+public:
+    explicit ScalingDatabase(Core::System& system);
+    ~ScalingDatabase();
+
+    void SaveDatabase();
+    void LoadDatabase();
+    void Init();
+
+    bool IsInDatabase(const PixelFormat format, const u32 width, const u32 height) const {
+        const ResolutionKey key{format, width, height};
+        return database.count(key) > 0;
+    }
+
+    bool IsBlacklisted(const PixelFormat format, const u32 width, const u32 height) const {
+        const ResolutionKey key{format, width, height};
+        return blacklist.count(key) > 0;
+    }
+
+    void Register(const PixelFormat format, const u32 width, const u32 height);
+    void Unregister(const PixelFormat format, const u32 width, const u32 height);
+
+    std::string GetTitleID() const;
+    std::string GetProfilePath() const;
+
+private:
+    std::unordered_set<ResolutionKey> database{};
+    std::unordered_set<ResolutionKey> blacklist{};
+    bool initialized{};
+    u64 title_id{};
+    Core::System& system;
+
+    static constexpr u32 DBVersion = 1;
+};
+
+} // namespace VideoCommon::Resolution
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -205,6 +205,10 @@ public:
        index = index_;
    }

+    void MarkAsRescaled(const bool is_rescaled) {
+        this->is_rescaled = is_rescaled;
+    }
+
    void MarkAsPicked(bool is_picked_) {
        is_picked = is_picked_;
    }
@@ -226,6 +230,10 @@ public:
        return index;
    }

+    bool IsRescaled() const {
+        return is_rescaled;
+    }
+
    bool IsRegistered() const {
        return is_registered;
    }
@@ -318,6 +326,7 @@ private:
    bool is_target{};
    bool is_registered{};
    bool is_picked{};
+    bool is_rescaled{};
    u32 index{NO_RT};
    u64 modification_tick{};
 };
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -12,6 +12,7 @@
 #include <tuple>
 #include <unordered_map>
 #include <vector>
+#include <fmt/format.h>

 #include <boost/icl/interval_map.hpp>
 #include <boost/range/iterator_range.hpp>
@@ -29,6 +30,7 @@
 #include "video_core/rasterizer_interface.h"
 #include "video_core/surface.h"
 #include "video_core/texture_cache/copy_params.h"
+#include "video_core/texture_cache/resolution_scaling/database.h"
 #include "video_core/texture_cache/surface_base.h"
 #include "video_core/texture_cache/surface_params.h"
 #include "video_core/texture_cache/surface_view.h"
@@ -53,15 +55,27 @@ class TextureCache {
    using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
    using IntervalType = typename IntervalMap::interval_type;

+private:
+    enum class UnregisterReason : u32 {
+        Invalidated,
+        Recycled,
+        Rebuilt,
+        Restructured,
+    };
+
 public:
    void InvalidateRegion(CacheAddr addr, std::size_t size) {
        std::lock_guard lock{mutex};

        for (const auto& surface : GetSurfacesInRegion(addr, size)) {
-            Unregister(surface);
+            Unregister(surface, UnregisterReason::Invalidated);
        }
    }

+    void LoadResources() {
+        scaling_database.Init();
+    }
+
    /***
     * `Guard` guarantees that rendertargets don't unregister themselves if the
     * collide. Protection is currently only done on 3D slices.
@@ -151,8 +165,12 @@ public:
            depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
        depth_buffer.target = surface_view.first;
        depth_buffer.view = surface_view.second;
-        if (depth_buffer.target)
+        if (depth_buffer.target) {
            depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
+            if (IsResolutionScannerEnabled()) {
+                MarkScanner(depth_buffer.target);
+            }
+        }
        return surface_view.second;
    }

@@ -185,8 +203,12 @@ public:
            render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
        render_targets[index].target = surface_view.first;
        render_targets[index].view = surface_view.second;
-        if (render_targets[index].target)
+        if (render_targets[index].target) {
            render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
+            if (IsResolutionScannerEnabled()) {
+                MarkScanner(render_targets[index].target);
+            }
+        }
        return surface_view.second;
    }

@@ -226,6 +248,12 @@ public:
        std::lock_guard lock{mutex};
        std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config);
        std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config);
+        if (IsResolutionScannerEnabled()) {
+            bool is_candidate = IsInRSDatabase(src_surface.first);
+            if (is_candidate) {
+                MarkScanner(dst_surface.first);
+            }
+        }
        ImageBlit(src_surface.second, dst_surface.second, copy_config);
        dst_surface.first->MarkAsModified(true, Tick());
    }
@@ -249,13 +277,46 @@ public:
        return ++ticks;
    }

+    bool IsResolutionScalingEnabled() {
+        if (IsResolutionScannerEnabled()) {
+            return CheckBlackListMatch();
+        }
+        if (!EnabledRescaling()) {
+            return false;
+        }
+        return CheckResolutionScalingEnabled();
+    }
+
+    bool IsResolutionScalingEnabledRT(const std::size_t index) {
+        if (!EnabledRescaling()) {
+            return false;
+        }
+        if (render_targets[index].target) {
+            return render_targets[index].target->IsRescaled();
+        }
+        return false;
+    }
+
+    bool IsResolutionScalingEnabledDB() {
+        if (!EnabledRescaling()) {
+            return false;
+        }
+        if (depth_buffer.target) {
+            return depth_buffer.target->IsRescaled();
+        }
+        return false;
+    }
+
 protected:
    TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
-        : system{system}, rasterizer{rasterizer} {
+        : system{system}, rasterizer{rasterizer}, scaling_database{system} {
        for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
            SetEmptyColorBuffer(i);
        }

+        enable_resolution_scaling =
+            Settings::values.resolution_factor != 1.0f && !Settings::values.use_resolution_scanner;
+
        SetEmptyDepthBuffer();
        staging_cache.SetSize(2);

@@ -316,13 +377,19 @@ protected:
        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
    }

-    void Unregister(TSurface surface) {
+    void Unregister(TSurface surface, UnregisterReason reason) {
        if (guard_render_targets && surface->IsProtected()) {
            return;
        }
        if (!guard_render_targets && surface->IsRenderTarget()) {
            ManageRenderTargetUnregister(surface);
        }
+
+        if (IsResolutionScannerEnabled()) {
+            if (reason == UnregisterReason::Restructured) {
+                UnmarkScanner(surface);
+            }
+        }
        const std::size_t size = surface->GetSizeInBytes();
        const VAddr cpu_addr = surface->GetCpuAddr();
        rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
@@ -348,6 +415,13 @@ protected:
        return GetSurface(gpu_addr, params, true, false);
    }

+    // Must be called by child's create surface
+    void SignalCreatedSurface(TSurface& new_surface) {
+        if (EnabledRescaling() && IsInRSDatabase(new_surface)) {
+            new_surface->MarkAsRescaled(true);
+        }
+    }
+
    Core::System& system;

 private:
@@ -408,7 +482,7 @@ private:
                                              const MatchTopologyResult untopological) {
        const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation;
        for (auto& surface : overlaps) {
-            Unregister(surface);
+            Unregister(surface, UnregisterReason::Recycled);
        }
        switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
        case RecycleStrategy::Ignore: {
@@ -467,7 +541,22 @@ private:
                ImageCopy(current_surface, new_surface, brick);
            }
        }
-        Unregister(current_surface);
+        if (IsResolutionScannerEnabled()) {
+            if (IsInRSDatabase(current_surface)) {
+                if (IsRSBlacklisted(new_surface)) {
+                    UnmarkScanner(current_surface);
+                } else {
+                    MarkScanner(new_surface);
+                }
+            } else if (IsInRSDatabase(new_surface)) {
+                if (IsRSBlacklisted(current_surface)) {
+                    UnmarkScanner(new_surface);
+                } else {
+                    MarkScanner(current_surface);
+                }
+            }
+        }
+        Unregister(current_surface, UnregisterReason::Rebuilt);
        Register(new_surface);
        new_surface->MarkAsModified(current_surface->IsModified(), Tick());
        return {new_surface, new_surface->GetMainView()};
@@ -548,7 +637,7 @@ private:
            return {};
        }
        for (auto surface : overlaps) {
-            Unregister(surface);
+            Unregister(surface, UnregisterReason::Restructured);
        }
        new_surface->MarkAsModified(modified, Tick());
        Register(new_surface);
@@ -701,7 +790,30 @@ private:
        return {new_surface, new_surface->GetMainView()};
    }

-    void LoadSurface(const TSurface& surface) {
+    void LoadSurfaceRescaled(TSurface& surface) {
+        const auto& params = surface->GetSurfaceParams();
+        enable_resolution_scaling = false;
+        TSurface proxy = CreateSurface(surface->GetGpuAddr(), params);
+        enable_resolution_scaling = true;
+        staging_cache.GetBuffer(0).resize(proxy->GetHostSizeInBytes());
+        proxy->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
+        proxy->UploadTexture(staging_cache.GetBuffer(0));
+        Tegra::Engines::Fermi2D::Config copy_config;
+        const Common::Rectangle<u32> rect{0, 0, params.width, params.height};
+        copy_config.operation = Tegra::Engines::Fermi2D::Operation::SrcCopy;
+        copy_config.filter = Tegra::Engines::Fermi2D::Filter::Linear;
+        copy_config.src_rect = rect;
+        copy_config.dst_rect = rect;
+        TView src_view = proxy->GetMainView();
+        TView dst_view = surface->GetMainView();
+        ImageBlit(src_view, dst_view, copy_config);
+    }
+
+    void LoadSurface(TSurface& surface) {
+        if (surface->IsRescaled()) {
+            LoadSurfaceRescaled(surface);
+            return;
+        }
        staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
        surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
        surface->UploadTexture(staging_cache.GetBuffer(0));
@@ -712,6 +824,9 @@ private:
        if (!surface->IsModified()) {
            return;
        }
+        if (IsResolutionScannerEnabled()) {
+            UnmarkScanner(surface);
+        }
        staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
        surface->DownloadTexture(staging_cache.GetBuffer(0));
        surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
@@ -782,6 +897,115 @@ private:
        return {};
    }

+    bool EnabledRescaling() const {
+        return enable_resolution_scaling;
+    }
+
+    bool IsResolutionScannerEnabled() const {
+        return Settings::values.use_resolution_scanner;
+    }
+
+    void UnmarkScanner(const TSurface& surface) {
+        const auto params = surface->GetSurfaceParams();
+        scaling_database.Unregister(params.pixel_format, params.width, params.height);
+    }
+
+    void MarkScanner(const TSurface& surface) {
+        const auto params = surface->GetSurfaceParams();
+        if (params.target != SurfaceTarget::Texture2D || params.num_levels > 1 ||
+            params.IsCompressed() || params.block_depth > 1) {
+            return;
+        }
+        scaling_database.Register(params.pixel_format, params.width, params.height);
+    }
+
+    bool IsRSBlacklisted(const TSurface& surface) const {
+        const auto params = surface->GetSurfaceParams();
+        return scaling_database.IsBlacklisted(params.pixel_format, params.width, params.height);
+    }
+
+    bool IsInRSDatabase(const TSurface& surface) const {
+        const auto& params = surface->GetSurfaceParams();
+        return scaling_database.IsInDatabase(params.pixel_format, params.width, params.height);
+    }
+
+    bool CheckBlackListMatch() {
+        u32 enabled_targets = 0;
+        u32 black_listed = 0;
+        bool black_list = false;
+        for (const auto& target : render_targets) {
+            if (target.target) {
+                enabled_targets++;
+                if (IsRSBlacklisted(target.target)) {
+                    black_list = true;
+                    black_listed++;
+                }
+            }
+        }
+        if (depth_buffer.target) {
+            enabled_targets++;
+            if (IsRSBlacklisted(depth_buffer.target)) {
+                black_list = true;
+                black_listed++;
+            }
+        }
+        if (black_list) {
+            if (black_listed != enabled_targets) {
+                std::string blacklist_msg{};
+                for (const auto& target : render_targets) {
+                    if (target.target) {
+                        UnmarkScanner(target.target);
+                        const auto& params = target.target->GetSurfaceParams();
+                        blacklist_msg += fmt::format("Format:{}, Height:{}, Width:{}\n",
+                                                     static_cast<u32>(params.pixel_format),
+                                                     params.height, params.width);
+                    }
+                }
+                if (depth_buffer.target) {
+                    UnmarkScanner(depth_buffer.target);
+                    const auto& params = depth_buffer.target->GetSurfaceParams();
+                    blacklist_msg += fmt::format("Format:{}, Height:{}, Width:{}\n",
+                                                 static_cast<u32>(params.pixel_format),
+                                                 params.height, params.width);
+                }
+                LOG_CRITICAL(HW_GPU, "Scan detected a conflict:\n{}\nBlacklisting all",
+                             blacklist_msg);
+            }
+        }
+        return false;
+    }
+
+    bool CheckResolutionScalingEnabled() {
+        u32 enabled_targets = 0;
+        u32 rescaled_targets = 0;
+        bool rescaling = false;
+        for (const auto& target : render_targets) {
+            if (target.target) {
+                enabled_targets++;
+                if (target.target->IsRescaled()) {
+                    rescaling = true;
+                    rescaled_targets++;
+                }
+            }
+        }
+        if (depth_buffer.target) {
+            enabled_targets++;
+            if (depth_buffer.target->IsRescaled()) {
+                rescaling = true;
+                rescaled_targets++;
+            }
+        }
+        if (rescaling) {
+            if (rescaled_targets != enabled_targets) {
+                LOG_CRITICAL(HW_GPU,
+                             "Rescaling Database is incorrectly set! Rescan the database!.");
+                return false;
+            }
+            return true;
+        }
+        return false;
+    }
+
    constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
        return siblings_table[static_cast<std::size_t>(format)];
    }
@@ -798,6 +1022,7 @@ private:
    // Guards the cache for protection conflicts.
    bool guard_render_targets{};
    bool guard_samplers{};
+    bool enable_resolution_scaling{};

    // The siblings table is for formats that can inter exchange with one another
    // without causing issues. This is only valid when a conflict occurs on a non
@@ -830,6 +1055,8 @@ private:

    StagingCache staging_cache;
    std::recursive_mutex mutex;
+
+    Resolution::ScalingDatabase scaling_database;
 };

 } // namespace VideoCommon
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -613,6 +613,8 @@ void Config::ReadRendererValues() {
        ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool();
    Settings::values.use_asynchronous_gpu_emulation =
        ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
+    Settings::values.use_resolution_scanner =
+        ReadSetting(QStringLiteral("use_resolution_scanner"), false).toBool();
    Settings::values.force_30fps_mode =
        ReadSetting(QStringLiteral("force_30fps_mode"), false).toBool();

@@ -1026,6 +1028,8 @@ void Config::SaveRendererValues() {
                 Settings::values.use_accurate_gpu_emulation, false);
    WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
                 Settings::values.use_asynchronous_gpu_emulation, false);
+    WriteSetting(QStringLiteral("use_resolution_scanner"), Settings::values.use_resolution_scanner,
+                 false);
    WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false);

    // Cast to double because Qt's written float values are not human-readable
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -10,7 +10,7 @@

 namespace {
 enum class Resolution : int {
-    Auto,
+    Scanner,
    Scale1x,
    Scale2x,
    Scale3x,
@@ -19,8 +19,8 @@ enum class Resolution : int {

 float ToResolutionFactor(Resolution option) {
    switch (option) {
-    case Resolution::Auto:
-        return 0.f;
+    case Resolution::Scanner:
+        return 1.f;
    case Resolution::Scale1x:
        return 1.f;
    case Resolution::Scale2x:
@@ -30,12 +30,12 @@ float ToResolutionFactor(Resolution option) {
    case Resolution::Scale4x:
        return 4.f;
    }
-    return 0.f;
+    return 1.f;
 }

-Resolution FromResolutionFactor(float factor) {
-    if (factor == 0.f) {
-        return Resolution::Auto;
+Resolution FromResolutionFactor(float factor, bool scanner_on) {
+    if (scanner_on) {
+        return Resolution::Scanner;
    } else if (factor == 1.f) {
        return Resolution::Scale1x;
    } else if (factor == 2.f) {
@@ -45,7 +45,7 @@ Resolution FromResolutionFactor(float factor) {
    } else if (factor == 4.f) {
        return Resolution::Scale4x;
    }
-    return Resolution::Auto;
+    return Resolution::Scale1x;
 }
 } // Anonymous namespace

@@ -62,6 +62,8 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
        }
        UpdateBackgroundColorButton(new_bg_color);
    });
+    connect(ui->brightness_reset, &QPushButton::pressed, this,
+            [this] { ui->brightness_slider->setValue(100); });
 }

 ConfigureGraphics::~ConfigureGraphics() = default;
@@ -69,8 +71,9 @@ ConfigureGraphics::~ConfigureGraphics() = default;
 void ConfigureGraphics::SetConfiguration() {
    const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();

-    ui->resolution_factor_combobox->setCurrentIndex(
-        static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
+    ui->resolution_factor_combobox->setEnabled(runtime_lock);
+    ui->resolution_factor_combobox->setCurrentIndex(static_cast<int>(FromResolutionFactor(
+        Settings::values.resolution_factor, Settings::values.use_resolution_scanner)));
    ui->use_disk_shader_cache->setEnabled(runtime_lock);
    ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
    ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
@@ -80,19 +83,22 @@ void ConfigureGraphics::SetConfiguration() {
    ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
    UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
                                                 Settings::values.bg_blue));
+    ui->brightness_slider->setValue(Settings::values.backlight_brightness * 100 + 50);
 }

 void ConfigureGraphics::ApplyConfiguration() {
-    Settings::values.resolution_factor =
-        ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
+    const auto resolution = static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex());
+    Settings::values.resolution_factor = ToResolutionFactor(resolution);
    Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
    Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
    Settings::values.use_asynchronous_gpu_emulation =
        ui->use_asynchronous_gpu_emulation->isChecked();
+    Settings::values.use_resolution_scanner = resolution == Resolution::Scanner;
    Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
    Settings::values.bg_red = static_cast<float>(bg_color.redF());
    Settings::values.bg_green = static_cast<float>(bg_color.greenF());
    Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
+    Settings::values.backlight_brightness = (ui->brightness_slider->value() - 50.0f) / 100.0f;
 }

 void ConfigureGraphics::changeEvent(QEvent* event) {
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -63,27 +63,27 @@
           <widget class="QComboBox" name="resolution_factor_combobox">
            <item>
             <property name="text">
-              <string>Auto (Window Size)</string>
+              <string>Profile Scanner (Native)</string>
             </property>
            </item>
            <item>
             <property name="text">
-              <string>Native (1280x720)</string>
+              <string>Native (1280x720/1920x1080)</string>
             </property>
            </item>
            <item>
             <property name="text">
-              <string>2x Native (2560x1440)</string>
+              <string>2x Native (2560x1440/3840x2160)</string>
             </property>
            </item>
            <item>
             <property name="text">
-              <string>3x Native (3840x2160)</string>
+              <string>3x Native (3840x2160/5760x3240)</string>
             </property>
            </item>
            <item>
             <property name="text">
-              <string>4x Native (5120x2880)</string>
+              <string>4x Native (5120x2880/7680x4320)</string>
             </property>
            </item>
           </widget>
@@ -111,6 +111,68 @@
          </item>
         </layout>
        </item>
+        <item>
+         <layout class="QHBoxLayout" name="horizontalLayout_3">
+          <item>
+           <widget class="QLabel" name="label_2">
+            <property name="text">
+             <string>Brightness</string>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <spacer name="horizontalSpacer">
+            <property name="orientation">
+             <enum>Qt::Horizontal</enum>
+            </property>
+            <property name="sizeHint" stdset="0">
+             <size>
+              <width>40</width>
+              <height>20</height>
+             </size>
+            </property>
+           </spacer>
+          </item>
+          <item>
+           <widget class="QSlider" name="brightness_slider">
+            <property name="minimum">
+             <number>50</number>
+            </property>
+            <property name="maximum">
+             <number>150</number>
+            </property>
+            <property name="singleStep">
+             <number>10</number>
+            </property>
+            <property name="pageStep">
+             <number>20</number>
+            </property>
+            <property name="value">
+             <number>100</number>
+            </property>
+            <property name="orientation">
+             <enum>Qt::Horizontal</enum>
+            </property>
+            <property name="tickPosition">
+             <enum>QSlider::NoTicks</enum>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="brightness_reset">
+            <property name="sizePolicy">
+             <sizepolicy hsizetype="Fixed" vsizetype="Fixed">
+              <horstretch>0</horstretch>
+              <verstretch>0</verstretch>
+             </sizepolicy>
+            </property>
+            <property name="text">
+             <string>Reset</string>
+            </property>
+           </widget>
+          </item>
+         </layout>
+        </item>
       </layout>
      </widget>
     </item>
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -66,10 +66,7 @@ std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList()
    };

    const auto& system = Core::System::GetInstance();
-    add_threads(system.Scheduler(0).GetThreadList());
-    add_threads(system.Scheduler(1).GetThreadList());
-    add_threads(system.Scheduler(2).GetThreadList());
-    add_threads(system.Scheduler(3).GetThreadList());
+    add_threads(system.GlobalScheduler().GetThreadList());

    return item_list;
 }
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -471,6 +471,7 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, std::string pat
    QAction* open_lfs_location = context_menu.addAction(tr("Open Mod Data Location"));
    QAction* open_transferable_shader_cache =
        context_menu.addAction(tr("Open Transferable Shader Cache"));
+    QAction* open_rescaling_profile_cache = context_menu.addAction(tr("Open Rescaling Profile"));
    context_menu.addSeparator();
    QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS"));
    QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard"));
@@ -490,6 +491,8 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, std::string pat
    });
    connect(open_transferable_shader_cache, &QAction::triggered,
            [this, program_id]() { emit OpenTransferableShaderCacheRequested(program_id); });
+    connect(open_rescaling_profile_cache, &QAction::triggered,
+            [this, program_id]() { emit OpenResolutionProfileRequested(program_id); });
    connect(dump_romfs, &QAction::triggered,
            [this, program_id, path]() { emit DumpRomFSRequested(program_id, path); });
    connect(copy_tid, &QAction::triggered,
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -75,6 +75,7 @@ signals:
    void ShouldCancelWorker();
    void OpenFolderRequested(u64 program_id, GameListOpenTarget target);
    void OpenTransferableShaderCacheRequested(u64 program_id);
+    void OpenResolutionProfileRequested(u64 program_id);
    void DumpRomFSRequested(u64 program_id, const std::string& game_path);
    void CopyTIDRequested(u64 program_id);
    void NavigateToGamedbEntryRequested(u64 program_id,
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -681,6 +681,8 @@ void GMainWindow::ConnectWidgetEvents() {
    connect(game_list, &GameList::OpenFolderRequested, this, &GMainWindow::OnGameListOpenFolder);
    connect(game_list, &GameList::OpenTransferableShaderCacheRequested, this,
            &GMainWindow::OnTransferableShaderCacheOpenFile);
+    connect(game_list, &GameList::OpenResolutionProfileRequested, this,
+            &GMainWindow::OnResolutionProfileOpenFile);
    connect(game_list, &GameList::DumpRomFSRequested, this, &GMainWindow::OnGameListDumpRomFS);
    connect(game_list, &GameList::CopyTIDRequested, this, &GMainWindow::OnGameListCopyTID);
    connect(game_list, &GameList::NavigateToGamedbEntryRequested, this,
@@ -1147,6 +1149,23 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target
    QDesktopServices::openUrl(QUrl::fromLocalFile(qpath));
 }

+void DisplayOrSelect(const QString& folder_path, const QString& file_path) {
+// Windows supports opening a folder with selecting a specified file in explorer. On every other
+// OS we just open the transferable shader cache folder without preselecting the transferable
+// shader cache file for the selected game.
+#if defined(Q_OS_WIN)
+    const QString explorer = QStringLiteral("explorer");
+    QStringList param;
+    if (!QFileInfo(file_path).isDir()) {
+        param << QStringLiteral("/select,");
+    }
+    param << QDir::toNativeSeparators(file_path);
+    QProcess::startDetached(explorer, param);
+#else
+    QDesktopServices::openUrl(QUrl::fromLocalFile(folder_path));
+#endif
+}
+
 void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) {
    ASSERT(program_id != 0);

@@ -1164,20 +1183,24 @@ void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) {
        return;
    }

-    // Windows supports opening a folder with selecting a specified file in explorer. On every other
-    // OS we just open the transferable shader cache folder without preselecting the transferable
-    // shader cache file for the selected game.
-#if defined(Q_OS_WIN)
-    const QString explorer = QStringLiteral("explorer");
-    QStringList param;
-    if (!QFileInfo(transferable_shader_cache_file_path).isDir()) {
-        param << QStringLiteral("/select,");
+    DisplayOrSelect(tranferable_shader_cache_folder_path, transferable_shader_cache_file_path);
+}
+
+void GMainWindow::OnResolutionProfileOpenFile(u64 program_id) {
+    ASSERT(program_id != 0);
+
+    const QString rescaling_dir =
+        QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::RescalingDir));
+    const QString rescaling_profile_file_path =
+        rescaling_dir + QString::fromStdString(fmt::format("{:016X}.json", program_id));
+
+    if (!QFile::exists(rescaling_profile_file_path)) {
+        QMessageBox::warning(this, tr("Error Opening Rescaling Profile"),
+                             tr("A rescaling profile for this title does not exist."));
+        return;
    }
-    param << QDir::toNativeSeparators(transferable_shader_cache_file_path);
-    QProcess::startDetached(explorer, param);
-#else
-    QDesktopServices::openUrl(QUrl::fromLocalFile(tranferable_shader_cache_folder_path));
-#endif
+
+    DisplayOrSelect(rescaling_dir, rescaling_profile_file_path);
 }

 static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) {
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -185,6 +185,7 @@ private slots:
    void OnGameListLoadFile(QString game_path);
    void OnGameListOpenFolder(u64 program_id, GameListOpenTarget target);
    void OnTransferableShaderCacheOpenFile(u64 program_id);
+    void OnResolutionProfileOpenFile(u64 program_id);
    void OnGameListDumpRomFS(u64 program_id, const std::string& game_path);
    void OnGameListCopyTID(u64 program_id);
    void OnGameListNavigateToGamedbEntry(u64 program_id,
Author	SHA1	Message	Date
yuzubot	e397cf57b5	"Merge PR 1012"	2019-09-24 12:01:23 +00:00
yuzubot	e51659c11e	"Merge PR 1340"	2019-09-24 12:01:22 +00:00
yuzubot	782f9248c2	"Merge PR 1703"	2019-09-24 12:01:22 +00:00
yuzubot	b4613f538a	"Merge PR 2365"	2019-09-24 12:01:21 +00:00
yuzubot	440a1d8de0	"Merge PR 2542"	2019-09-24 12:01:20 +00:00
yuzubot	89e1819037	"Merge PR 2574"	2019-09-24 12:01:19 +00:00
yuzubot	4427f8b29a	"Merge PR 2710"	2019-09-24 12:01:18 +00:00
yuzubot	1d86b7557a	"Merge PR 2859"	2019-09-24 12:01:17 +00:00
yuzubot	78304982fe	"Merge PR 2860"	2019-09-24 12:01:17 +00:00