"Merge Tagged PR 6598"

"Merge Tagged PR 7346"
"Merge Tagged PR 8240"
2022-07-06 12:02:07 +00:00 · 2022-07-06 12:02:07 +00:00 · 2022-07-06 12:02:05 +00:00 · 2022-07-06 12:02:05 +00:00 · 2022-07-06 12:02:04 +00:00 · 2022-07-06 12:02:04 +00:00
19 changed files with 196 additions and 96 deletions
--- a/.ci/templates/build-msvc.yml
+++ b/.ci/templates/build-msvc.yml
@@ -8,7 +8,7 @@ steps:
  displayName: 'Install vulkan-sdk'
 - script: python -m pip install --upgrade pip conan
  displayName: 'Install conan'
- script: refreshenv && mkdir build && cd build && cmake -G "Visual Studio 17 2022" -A x64 -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DYUZU_TESTS=OFF -DUSE_DISCORD_PRESENCE=ON -DENABLE_QT_TRANSLATION=ON -DDISPLAY_VERSION=${{ parameters['version'] }} -DCMAKE_BUILD_TYPE=Release .. && cd ..
+- script: refreshenv && mkdir build && cd build && cmake -G "Visual Studio 16 2019" -A x64 -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DYUZU_TESTS=OFF -DUSE_DISCORD_PRESENCE=ON -DENABLE_QT_TRANSLATION=ON -DDISPLAY_VERSION=${{ parameters['version'] }} -DCMAKE_BUILD_TYPE=Release .. && cd ..
  displayName: 'Configure CMake'
 - task: MSBuild@1
  displayName: 'Build'
--- a/.ci/yuzu-mainline-step2.yml
+++ b/.ci/yuzu-mainline-step2.yml
@@ -47,7 +47,7 @@ stages:
    timeoutInMinutes: 120
    displayName: 'msvc'
    pool:
-      vmImage: windows-2022
+      vmImage: windows-2019
    steps:
    - template: ./templates/sync-source.yml
      parameters:
--- a/.ci/yuzu-patreon-step2.yml
+++ b/.ci/yuzu-patreon-step2.yml
@@ -12,7 +12,7 @@ stages:
    timeoutInMinutes: 120
    displayName: 'windows-msvc'
    pool:
-      vmImage: windows-2022
+      vmImage: windows-2019
    steps:
    - template: ./templates/sync-source.yml
      parameters:
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -86,6 +86,16 @@ static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) {
 }

 void Stream::PlayNextBuffer(std::chrono::nanoseconds ns_late) {
+#ifndef _WIN32
+    auto now = std::chrono::steady_clock::now();
+    auto duration = now.time_since_epoch();
+    auto nanoseconds = std::chrono::duration_cast<std::chrono::nanoseconds>(duration);
+
+    if (nanoseconds > expected_cb_time) {
+        ns_late = nanoseconds - expected_cb_time;
+    }
+#endif
+
    if (!IsPlaying()) {
        // Ensure we are in playing state before playing the next buffer
        sink_stream.Flush();
@@ -120,6 +130,9 @@ void Stream::PlayNextBuffer(std::chrono::nanoseconds ns_late) {
        ns_late = {};
    }

+#ifndef _WIN32
+    expected_cb_time = nanoseconds + (buffer_release_ns - ns_late);
+#endif
    core_timing.ScheduleEvent(buffer_release_ns - ns_late, release_event, {});
 }

--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -123,6 +123,9 @@ private:
    SinkStream& sink_stream;                ///< Output sink for the stream
    Core::Timing::CoreTiming& core_timing;  ///< Core timing instance.
    std::string name;                       ///< Name of the stream, must be unique
+#ifndef _WIN32
+    std::chrono::nanoseconds expected_cb_time = {}; ///< Estimated time of next callback
+#endif
 };

 using StreamPtr = std::shared_ptr<Stream>;
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -125,7 +125,9 @@ public:
    }

    void AddTicks(u64 ticks) override {
-        ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
+        if (parent.uses_wall_clock) {
+            return;
+        }

        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
        // rough approximation of the amount of executed ticks in the system, it may be thrown off
@@ -142,7 +144,12 @@ public:
    }

    u64 GetTicksRemaining() override {
-        ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
+        if (parent.uses_wall_clock) {
+            if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
+                return minimum_run_cycles;
+            }
+            return 0U;
+        }

        return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
    }
@@ -172,7 +179,7 @@ public:
    Core::Memory::Memory& memory;
    std::size_t num_interpreted_instructions{};
    bool debugger_enabled{};
-    static constexpr u64 minimum_run_cycles = 1000U;
+    static constexpr u64 minimum_run_cycles = 10000U;
 };

 std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* page_table) const {
@@ -200,7 +207,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*

    // Timing
    config.wall_clock_cntpct = uses_wall_clock;
-    config.enable_cycle_counting = !uses_wall_clock;
+    config.enable_cycle_counting = true;

    // Code cache size
    config.code_cache_size = 512_MiB;
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -166,7 +166,9 @@ public:
    }

    void AddTicks(u64 ticks) override {
-        ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
+        if (parent.uses_wall_clock) {
+            return;
+        }

        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
        // rough approximation of the amount of executed ticks in the system, it may be thrown off
@@ -181,7 +183,12 @@ public:
    }

    u64 GetTicksRemaining() override {
-        ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
+        if (parent.uses_wall_clock) {
+            if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
+                return minimum_run_cycles;
+            }
+            return 0U;
+        }

        return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
    }
@@ -216,7 +223,7 @@ public:
    u64 tpidrro_el0 = 0;
    u64 tpidr_el0 = 0;
    bool debugger_enabled{};
-    static constexpr u64 minimum_run_cycles = 1000U;
+    static constexpr u64 minimum_run_cycles = 10000U;
 };

 std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* page_table,
@@ -260,7 +267,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*

    // Timing
    config.wall_clock_cntpct = uses_wall_clock;
-    config.enable_cycle_counting = !uses_wall_clock;
+    config.enable_cycle_counting = true;

    // Code cache size
    config.code_cache_size = 512_MiB;
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -138,6 +138,7 @@ struct System::Impl {

        kernel.Suspend(false);
        core_timing.SyncPause(false);
+        cpu_manager.Pause(false);
        is_paused = false;

        return status;
@@ -149,6 +150,7 @@ struct System::Impl {

        core_timing.SyncPause(true);
        kernel.Suspend(true);
+        cpu_manager.Pause(true);
        is_paused = true;

        return status;
@@ -158,6 +160,7 @@ struct System::Impl {
        std::unique_lock<std::mutex> lk(suspend_guard);
        kernel.Suspend(true);
        core_timing.SyncPause(true);
+        cpu_manager.Pause(true);
        return lk;
    }

@@ -165,6 +168,7 @@ struct System::Impl {
        if (!is_paused) {
            core_timing.SyncPause(false);
            kernel.Suspend(false);
+            cpu_manager.Pause(false);
        }
    }

@@ -330,8 +334,6 @@ struct System::Impl {
            gpu_core->NotifyShutdown();
        }

-        kernel.ShutdownCores();
-        cpu_manager.Shutdown();
        debugger.reset();
        services.reset();
        service_manager.reset();
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -61,12 +61,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
    const auto empty_timed_callback = [](std::uintptr_t, std::chrono::nanoseconds) {};
    ev_lost = CreateEvent("_lost_event", empty_timed_callback);
    if (is_multicore) {
-        const auto hardware_concurrency = std::thread::hardware_concurrency();
-        size_t id = 0;
-        worker_threads.emplace_back(ThreadEntry, std::ref(*this), id++);
-        if (hardware_concurrency > 8) {
-            worker_threads.emplace_back(ThreadEntry, std::ref(*this), id++);
-        }
+        worker_threads.emplace_back(ThreadEntry, std::ref(*this), 0);
    }
 }

@@ -228,14 +223,11 @@ std::optional<s64> CoreTiming::Advance() {
        event_queue.pop_back();

        if (const auto event_type{evt.type.lock()}) {
-            sequence_mutex.lock();
+
            event_mutex.unlock();

-            event_type->guard.lock();
-            sequence_mutex.unlock();
            const s64 delay = static_cast<s64>(GetGlobalTimeNs().count() - evt.time);
            event_type->callback(evt.user_data, std::chrono::nanoseconds{delay});
-            event_type->guard.unlock();

            event_mutex.lock();
            pending_events.fetch_sub(1, std::memory_order_relaxed);
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -32,7 +32,6 @@ struct EventType {
    TimedCallback callback;
    /// A pointer to the name of the event.
    const std::string name;
-    mutable std::mutex guard;
 };

 /**
@@ -157,7 +156,6 @@ private:
    std::condition_variable wait_pause_cv;
    std::condition_variable wait_signal_cv;
    mutable std::mutex event_mutex;
-    mutable std::mutex sequence_mutex;

    std::atomic<bool> paused_state{};
    bool is_paused{};
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -25,8 +25,10 @@ void CpuManager::ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager
 }

 void CpuManager::Initialize() {
+    running_mode = true;
    num_cores = is_multicore ? Core::Hardware::NUM_CPU_CORES : 1;
    gpu_barrier = std::make_unique<Common::Barrier>(num_cores + 1);
+    pause_barrier = std::make_unique<Common::Barrier>(num_cores + 1);

    for (std::size_t core = 0; core < num_cores; core++) {
        core_data[core].host_thread = std::jthread(ThreadStart, std::ref(*this), core);
@@ -34,11 +36,8 @@ void CpuManager::Initialize() {
 }

 void CpuManager::Shutdown() {
-    for (std::size_t core = 0; core < num_cores; core++) {
-        if (core_data[core].host_thread.joinable()) {
-            core_data[core].host_thread.join();
-        }
-    }
+    running_mode = false;
+    Pause(false);
 }

 void CpuManager::GuestThreadFunction() {
@@ -65,10 +64,6 @@ void CpuManager::IdleThreadFunction() {
    }
 }

-void CpuManager::ShutdownThreadFunction() {
-    ShutdownThread();
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 ///                             MultiCore                                   ///
 ///////////////////////////////////////////////////////////////////////////////
@@ -181,13 +176,41 @@ void CpuManager::PreemptSingleCore(bool from_running_enviroment) {
    }
 }

-void CpuManager::ShutdownThread() {
+void CpuManager::SuspendThread() {
    auto& kernel = system.Kernel();
-    auto core = is_multicore ? kernel.CurrentPhysicalCoreIndex() : 0;
-    auto* current_thread = kernel.GetCurrentEmuThread();
+    kernel.CurrentScheduler()->OnThreadStart();

-    Common::Fiber::YieldTo(current_thread->GetHostContext(), *core_data[core].host_context);
-    UNREACHABLE();
+    while (true) {
+        auto core = is_multicore ? kernel.CurrentPhysicalCoreIndex() : 0;
+        auto& scheduler = *kernel.CurrentScheduler();
+        Kernel::KThread* current_thread = scheduler.GetSchedulerCurrentThread();
+        Common::Fiber::YieldTo(current_thread->GetHostContext(), *core_data[core].host_context);
+
+        // This shouldn't be here. This is here because the scheduler needs the current
+        // thread to have dispatch disabled before explicitly rescheduling. Ideally in the
+        // future this will be called by RequestScheduleOnInterrupt and explicitly disabling
+        // dispatch outside the scheduler will not be necessary.
+        current_thread->DisableDispatch();
+
+        scheduler.RescheduleCurrentCore();
+    }
+}
+
+void CpuManager::Pause(bool paused) {
+    std::scoped_lock lk{pause_lock};
+
+    if (pause_state == paused) {
+        return;
+    }
+
+    // Set the new state
+    pause_state.store(paused);
+
+    // Wake up any waiting threads
+    pause_state.notify_all();
+
+    // Wait for all threads to successfully change state before returning
+    pause_barrier->Sync();
 }

 void CpuManager::RunThread(std::size_t core) {
@@ -218,9 +241,27 @@ void CpuManager::RunThread(std::size_t core) {
        system.GPU().ObtainContext();
    }

-    auto* current_thread = system.Kernel().CurrentScheduler()->GetIdleThread();
-    Kernel::SetCurrentThread(system.Kernel(), current_thread);
-    Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext());
+    {
+        // Set the current thread on entry
+        auto* current_thread = system.Kernel().CurrentScheduler()->GetIdleThread();
+        Kernel::SetCurrentThread(system.Kernel(), current_thread);
+    }
+
+    while (running_mode) {
+        if (pause_state.load(std::memory_order_relaxed)) {
+            // Wait for caller to acknowledge pausing
+            pause_barrier->Sync();
+
+            // Wait until unpaused
+            pause_state.wait(true, std::memory_order_relaxed);
+
+            // Wait for caller to acknowledge unpausing
+            pause_barrier->Sync();
+        }
+
+        auto current_thread = system.Kernel().CurrentScheduler()->GetSchedulerCurrentThread();
+        Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext());
+    }
 }

 } // namespace Core
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -50,14 +50,16 @@ public:
    void Initialize();
    void Shutdown();

+    void Pause(bool paused);
+
    std::function<void()> GetGuestThreadStartFunc() {
        return [this] { GuestThreadFunction(); };
    }
    std::function<void()> GetIdleThreadStartFunc() {
        return [this] { IdleThreadFunction(); };
    }
-    std::function<void()> GetShutdownThreadStartFunc() {
-        return [this] { ShutdownThreadFunction(); };
+    std::function<void()> GetSuspendThreadStartFunc() {
+        return [this] { SuspendThread(); };
    }

    void PreemptSingleCore(bool from_running_enviroment = true);
@@ -70,7 +72,6 @@ private:
    void GuestThreadFunction();
    void GuestRewindFunction();
    void IdleThreadFunction();
-    void ShutdownThreadFunction();

    void MultiCoreRunGuestThread();
    void MultiCoreRunGuestLoop();
@@ -82,7 +83,7 @@ private:

    static void ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager, std::size_t core);

-    void ShutdownThread();
+    void SuspendThread();
    void RunThread(std::size_t core);

    struct CoreData {
@@ -90,7 +91,12 @@ private:
        std::jthread host_thread;
    };

+    std::atomic<bool> running_mode{};
+    std::atomic<bool> pause_state{};
+    std::unique_ptr<Common::Barrier> pause_barrier{};
    std::unique_ptr<Common::Barrier> gpu_barrier{};
+    std::mutex pause_lock{};
+
    std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{};

    bool is_async_gpu{};
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -269,7 +269,7 @@ Result KThread::InitializeIdleThread(Core::System& system, KThread* thread, s32
 Result KThread::InitializeHighPriorityThread(Core::System& system, KThread* thread,
                                             KThreadFunction func, uintptr_t arg, s32 virt_core) {
    return InitializeThread(thread, func, arg, {}, {}, virt_core, nullptr, ThreadType::HighPriority,
-                            system.GetCpuManager().GetShutdownThreadStartFunc());
+                            system.GetCpuManager().GetSuspendThreadStartFunc());
 }

 Result KThread::InitializeUserThread(Core::System& system, KThread* thread, KThreadFunction func,
@@ -741,19 +741,6 @@ void KThread::Continue() {
    KScheduler::OnThreadStateChanged(kernel, this, old_state);
 }

-void KThread::WaitUntilSuspended() {
-    // Make sure we have a suspend requested.
-    ASSERT(IsSuspendRequested());
-
-    // Loop until the thread is not executing on any core.
-    for (std::size_t i = 0; i < static_cast<std::size_t>(Core::Hardware::NUM_CPU_CORES); ++i) {
-        KThread* core_thread{};
-        do {
-            core_thread = kernel.Scheduler(i).GetSchedulerCurrentThread();
-        } while (core_thread == this);
-    }
-}
-
 Result KThread::SetActivity(Svc::ThreadActivity activity) {
    // Lock ourselves.
    KScopedLightLock lk(activity_pause_lock);
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -208,8 +208,6 @@ public:

    void Continue();

-    void WaitUntilSuspended();
-
    constexpr void SetSyncedIndex(s32 index) {
        synced_index = index;
    }
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -76,7 +76,7 @@ struct KernelCore::Impl {
        InitializeMemoryLayout();
        Init::InitializeKPageBufferSlabHeap(system);
        InitializeSchedulers();
-        InitializeShutdownThreads();
+        InitializeSuspendThreads();
        InitializePreemption(kernel);

        RegisterHostThread();
@@ -143,9 +143,9 @@ struct KernelCore::Impl {
        CleanupObject(system_resource_limit);

        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
-            if (shutdown_threads[core_id]) {
-                shutdown_threads[core_id]->Close();
-                shutdown_threads[core_id] = nullptr;
+            if (suspend_threads[core_id]) {
+                suspend_threads[core_id]->Close();
+                suspend_threads[core_id] = nullptr;
            }

            schedulers[core_id]->Finalize();
@@ -247,13 +247,13 @@ struct KernelCore::Impl {
        system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
    }

-    void InitializeShutdownThreads() {
+    void InitializeSuspendThreads() {
        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
-            shutdown_threads[core_id] = KThread::Create(system.Kernel());
-            ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {},
+            suspend_threads[core_id] = KThread::Create(system.Kernel());
+            ASSERT(KThread::InitializeHighPriorityThread(system, suspend_threads[core_id], {}, {},
                                                         core_id)
                       .IsSuccess());
-            shutdown_threads[core_id]->SetName(fmt::format("SuspendThread:{}", core_id));
+            suspend_threads[core_id]->SetName(fmt::format("SuspendThread:{}", core_id));
        }
    }

@@ -775,7 +775,7 @@ struct KernelCore::Impl {
    std::weak_ptr<ServiceThread> default_service_thread;
    Common::ThreadWorker service_threads_manager;

-    std::array<KThread*, Core::Hardware::NUM_CPU_CORES> shutdown_threads;
+    std::array<KThread*, Core::Hardware::NUM_CPU_CORES> suspend_threads;
    std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{};
    std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};

@@ -1085,27 +1085,16 @@ const Kernel::KSharedMemory& KernelCore::GetHidBusSharedMem() const {

 void KernelCore::Suspend(bool suspended) {
    const bool should_suspend{exception_exited || suspended};
-    const auto activity = should_suspend ? ProcessActivity::Paused : ProcessActivity::Runnable;
-
-    for (auto* process : GetProcessList()) {
-        process->SetActivity(activity);
-
-        if (should_suspend) {
-            // Wait for execution to stop
-            for (auto* thread : process->GetThreadList()) {
-                thread->WaitUntilSuspended();
-            }
+    const auto state{should_suspend ? ThreadState::Runnable : ThreadState::Waiting};
+    {
+        KScopedSchedulerLock lk{*this};
+        for (auto* thread : impl->suspend_threads) {
+            thread->SetState(state);
+            thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Suspended);
        }
    }
 }

-void KernelCore::ShutdownCores() {
-    for (auto* thread : impl->shutdown_threads) {
-        void(thread->Run());
-    }
-    InterruptAllPhysicalCores();
-}
-
 bool KernelCore::IsMulticore() const {
    return impl->is_multicore;
 }
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -280,9 +280,6 @@ public:
    /// Exceptional exit all processes.
    void ExceptionalExit();

-    /// Notify emulated CPU cores to shut down.
-    void ShutdownCores();
-
    bool IsMulticore() const;

    bool IsShuttingDown() const;
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -31,8 +31,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
    VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();

    while (!stop_token.stop_requested()) {
-        CommandDataContainer next;
-        state.queue.Pop(next, stop_token);
+        CommandDataContainer next = state.queue.PopWait(stop_token);
        if (stop_token.stop_requested()) {
            break;
        }
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -10,7 +10,7 @@
 #include <thread>
 #include <variant>

-#include "common/bounded_threadsafe_queue.h"
+#include "common/threadsafe_queue.h"
 #include "video_core/framebuffer_config.h"

 namespace Tegra {
@@ -96,7 +96,7 @@ struct CommandDataContainer {

 /// Struct used to synchronize the GPU thread
 struct SynchState final {
-    using CommandQueue = Common::MPSCQueue<CommandDataContainer>;
+    using CommandQueue = Common::MPSCQueue<CommandDataContainer, true>;
    std::mutex write_lock;
    CommandQueue queue;
    u64 last_fence{};
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -3,6 +3,8 @@

 #include <array>
 #include <vector>
+#include "common/scope_exit.h"
+#include "video_core/dirty_flags.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/macro/macro.h"
 #include "video_core/macro/macro_hle.h"
@@ -58,6 +60,7 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
    maxwell3d.regs.index_array.first = parameters[3];
    maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base?
    maxwell3d.regs.index_array.count = parameters[1];
+    maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
    maxwell3d.regs.vb_element_base = element_base;
    maxwell3d.regs.vb_base_instance = base_instance;
    maxwell3d.mme_draw.instance_count = instance_count;
@@ -80,10 +83,67 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
    maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
 }

-constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
+// Multidraw Indirect
+void HLE_3f5e74b9c9a50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
+    SCOPE_EXIT({
+        // Clean everything.
+        maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
+        maxwell3d.regs.index_array.count = 0;
+        maxwell3d.regs.vb_element_base = 0x0;
+        maxwell3d.regs.vb_base_instance = 0x0;
+        maxwell3d.mme_draw.instance_count = 0;
+        maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+        maxwell3d.CallMethodFromMME(0x8e4, 0x0);
+        maxwell3d.CallMethodFromMME(0x8e5, 0x0);
+        maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+    });
+    const u32 start_indirect = parameters[0];
+    const u32 end_indirect = parameters[1];
+    if (start_indirect >= end_indirect) {
+        // Nothing to do.
+        return;
+    }
+    const auto topology =
+        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
+    maxwell3d.regs.draw.topology.Assign(topology);
+    const u32 padding = parameters[3];
+    const std::size_t max_draws = parameters[4];
+
+    const u32 indirect_words = 5 + padding;
+    const std::size_t first_draw = start_indirect;
+    const std::size_t effective_draws = end_indirect - start_indirect;
+    const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws);
+
+    for (std::size_t index = first_draw; index < last_draw; index++) {
+        const std::size_t base = index * indirect_words + 5;
+        const u32 num_vertices = parameters[base];
+        const u32 instance_count = parameters[base + 1];
+        const u32 first_index = parameters[base + 2];
+        const u32 base_vertex = parameters[base + 3];
+        const u32 base_instance = parameters[base + 4];
+        maxwell3d.regs.index_array.first = first_index;
+        maxwell3d.regs.reg_array[0x446] = base_vertex;
+        maxwell3d.regs.index_array.count = num_vertices;
+        maxwell3d.regs.vb_element_base = base_vertex;
+        maxwell3d.regs.vb_base_instance = base_instance;
+        maxwell3d.mme_draw.instance_count = instance_count;
+        maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+        maxwell3d.CallMethodFromMME(0x8e4, base_vertex);
+        maxwell3d.CallMethodFromMME(0x8e5, base_instance);
+        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+        if (maxwell3d.ShouldExecute()) {
+            maxwell3d.Rasterizer().Draw(true, true);
+        }
+        maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+    }
+}
+
+constexpr std::array<std::pair<u64, HLEFunction>, 4> hle_funcs{{
    {0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
    {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD},
    {0x0217920100488FF7, &HLE_0217920100488FF7},
+    {0x3f5e74b9c9a50164, &HLE_3f5e74b9c9a50164},
 }};

 class HLEMacroImpl final : public CachedMacro {
@@ -99,6 +159,7 @@ private:
    Engines::Maxwell3D& maxwell3d;
    HLEFunction func;
 };
+
 } // Anonymous namespace

 HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
Author	SHA1	Message	Date
yuzubot	1a4d4f2057	"Merge Tagged PR 6598"	2022-07-06 12:02:07 +00:00
yuzubot	7ced4d4b2d	"Merge Tagged PR 7346"	2022-07-06 12:02:07 +00:00
yuzubot	4a3d1794ba	"Merge Tagged PR 8240"	2022-07-06 12:02:05 +00:00
yuzubot	492f5c10c2	"Merge Tagged PR 8349"	2022-07-06 12:02:05 +00:00
yuzubot	55bc86a0ef	"Merge Tagged PR 8531"	2022-07-06 12:02:04 +00:00
yuzubot	4daa552d9c	"Merge Tagged PR 8538"	2022-07-06 12:02:04 +00:00
yuzubot	4d34fceef1	"Merge Tagged PR 8542"	2022-07-06 12:02:04 +00:00