NvFlinger: Split Buffer Wait from VSync on Async VSync.

This commit splits presentation from waiting. Now buffers will be waited for in a different thread and presented on a sepprate thread.
2020-07-02 20:23:58 -04:00
5 changed files with 131 additions and 5 deletions
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -115,11 +115,36 @@ std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::Ac
    return *itr;
 }

-void BufferQueue::ReleaseBuffer(u32 slot) {
+std::optional<std::reference_wrapper<const BufferQueue::Buffer>>
+BufferQueue::ObtainPresentBuffer() {
+    auto itr = queue.end();
+    // Iterate to find a queued buffer matching the requested slot.
+    while (itr == queue.end() && !presenting_sequence.empty()) {
+        u32 slot = presenting_sequence.front();
+        itr = std::find_if(queue.begin(), queue.end(), [&slot](const Buffer& buffer) {
+            return buffer.status == Buffer::Status::Presenting && buffer.slot == slot;
+        });
+        presenting_sequence.pop_front();
+    }
+    if (itr == queue.end())
+        return {};
+    return *itr;
+}
+
+void BufferQueue::SetToPresentBuffer(u32 slot) {
    auto itr = std::find_if(queue.begin(), queue.end(),
                            [&](const Buffer& buffer) { return buffer.slot == slot; });
    ASSERT(itr != queue.end());
    ASSERT(itr->status == Buffer::Status::Acquired);
+    itr->status = Buffer::Status::Presenting;
+    presenting_sequence.push_back(slot);
+}
+
+void BufferQueue::ReleaseBuffer(u32 slot) {
+    auto itr = std::find_if(queue.begin(), queue.end(),
+                            [&](const Buffer& buffer) { return buffer.slot == slot; });
+    ASSERT(itr != queue.end());
+    ASSERT(itr->status == Buffer::Status::Presenting || itr->status == Buffer::Status::Acquired);
    itr->status = Buffer::Status::Free;
    free_buffers.push_back(slot);

--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -77,7 +77,7 @@ public:
    };

    struct Buffer {
-        enum class Status { Free = 0, Queued = 1, Dequeued = 2, Acquired = 3 };
+        enum class Status { Free = 0, Queued = 1, Dequeued = 2, Acquired = 3, Presenting = 4 };

        u32 slot;
        Status status = Status::Free;
@@ -96,6 +96,8 @@ public:
                     const Common::Rectangle<int>& crop_rect, u32 swap_interval,
                     Service::Nvidia::MultiFence& multi_fence);
    std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
+    std::optional<std::reference_wrapper<const Buffer>> ObtainPresentBuffer();
+    void SetToPresentBuffer(u32 slot);
    void ReleaseBuffer(u32 slot);
    void Disconnect();
    u32 Query(QueryType type);
@@ -115,6 +117,7 @@ private:
    std::list<u32> free_buffers;
    std::vector<Buffer> queue;
    std::list<u32> queue_sequence;
+    std::list<u32> presenting_sequence;
    Kernel::EventPair buffer_wait_event;
 };

--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -35,6 +35,10 @@ void NVFlinger::VSyncThread(NVFlinger& nv_flinger) {
    nv_flinger.SplitVSync();
 }

+void NVFlinger::WaitForBuffersThread(NVFlinger& nv_flinger) {
+    nv_flinger.WaitForBuffers();
+}
+
 void NVFlinger::SplitVSync() {
    system.RegisterHostThread();
    std::string name = "yuzu:VSyncThread";
@@ -43,14 +47,37 @@ void NVFlinger::SplitVSync() {
    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
    s64 delay = 0;
    while (is_running) {
-        guard->lock();
        const s64 time_start = system.CoreTiming().GetGlobalTimeNs().count();
-        Compose();
+        for (auto& display : displays) {
+            // Trigger vsync for this display at the end of drawing
+            SCOPE_EXIT({ display.SignalVSyncEvent(); });
+
+            // Don't do anything for displays without layers.
+            if (!display.HasLayers())
+                continue;
+
+            // TODO(Subv): Support more than 1 layer.
+            VI::Layer& layer = display.GetLayer(0);
+            auto& buffer_queue = layer.GetBufferQueue();
+
+            guard->lock();
+            // Search for a queued buffer and acquire it
+            auto buffer = buffer_queue.ObtainPresentBuffer();
+            guard->unlock();
+
+            if (!buffer) {
+                continue;
+            }
+
+            MicroProfileFlip();
+
+            swap_interval = buffer->get().swap_interval;
+            buffer_queue.ReleaseBuffer(buffer->get().slot);
+        }
        const auto ticks = GetNextTicks();
        const s64 time_end = system.CoreTiming().GetGlobalTimeNs().count();
        const s64 time_passed = time_end - time_start;
        const s64 next_time = std::max<s64>(0, ticks - time_passed - delay);
-        guard->unlock();
        if (next_time > 0) {
            wait_event->WaitFor(std::chrono::nanoseconds{next_time});
        }
@@ -58,6 +85,63 @@ void NVFlinger::SplitVSync() {
    }
 }

+void NVFlinger::WaitForBuffers() {
+    system.RegisterHostThread();
+    std::string name = "yuzu:WaitBufferQueueThread";
+    MicroProfileOnThreadCreate(name.c_str());
+    Common::SetCurrentThreadName(name.c_str());
+    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+    s64 delay = 0;
+    while (is_running) {
+        for (auto& display : displays) {
+            // Don't do anything for displays without layers.
+            if (!display.HasLayers())
+                continue;
+
+            // TODO(Subv): Support more than 1 layer.
+            VI::Layer& layer = display.GetLayer(0);
+            auto& buffer_queue = layer.GetBufferQueue();
+
+            guard->lock();
+
+            // Search for a queued buffer and acquire it
+            auto buffer = buffer_queue.AcquireBuffer();
+
+            guard->unlock();
+            if (!buffer) {
+
+                continue;
+            }
+
+            const auto& igbp_buffer = buffer->get().igbp_buffer;
+            // Now send the buffer to the GPU for drawing.
+            // TODO(Subv): Support more than just disp0. The display device selection is probably
+            // based on which display we're drawing (Default, Internal, External, etc)
+            auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
+            ASSERT(nvdisp);
+
+            nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
+                         igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
+                         buffer->get().transform, buffer->get().crop_rect);
+
+            auto& gpu = system.GPU();
+            const auto& multi_fence = buffer->get().multi_fence;
+
+            for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
+                const auto& fence = multi_fence.fences[fence_id];
+                gpu.WaitFence(fence.id, fence.value);
+            }
+
+            buffer_queue.SetToPresentBuffer(buffer->get().slot);
+        }
+        queue_event->Wait();
+    }
+}
+
+void NVFlinger::NotifyQueue() {
+    queue_event->Set();
+}
+
 NVFlinger::NVFlinger(Core::System& system) : system(system) {
    displays.emplace_back(0, "Default", system);
    displays.emplace_back(1, "External", system);
@@ -78,7 +162,9 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
    if (system.IsMulticore()) {
        is_running = true;
        wait_event = std::make_unique<Common::Event>();
+        queue_event = std::make_unique<Common::Event>();
        vsync_thread = std::make_unique<std::thread>(VSyncThread, std::ref(*this));
+        buffer_thread = std::make_unique<std::thread>(WaitForBuffersThread, std::ref(*this));
    } else {
        system.CoreTiming().ScheduleEvent(frame_ticks, composition_event);
    }
@@ -88,9 +174,13 @@ NVFlinger::~NVFlinger() {
    if (system.IsMulticore()) {
        is_running = false;
        wait_event->Set();
+        queue_event->Set();
        vsync_thread->join();
+        buffer_thread->join();
        vsync_thread.reset();
+        buffer_thread.reset();
        wait_event.reset();
+        queue_event.reset();
    } else {
        system.CoreTiming().UnscheduleEvent(composition_event, 0);
    }
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -80,6 +80,9 @@ public:
    /// Obtains a buffer queue identified by the ID.
    const BufferQueue& FindBufferQueue(u32 id) const;

+    /// On queueing buffer for rendering
+    void NotifyQueue();
+
    /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
    /// finished.
    void Compose();
@@ -104,8 +107,10 @@ private:
    const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;

    static void VSyncThread(NVFlinger& nv_flinger);
+    static void WaitForBuffersThread(NVFlinger& nv_flinger);

    void SplitVSync();
+    void WaitForBuffers();

    std::shared_ptr<Nvidia::Module> nvdrv;

@@ -128,7 +133,9 @@ private:
    Core::System& system;

    std::unique_ptr<std::thread> vsync_thread;
+    std::unique_ptr<std::thread> buffer_thread;
    std::unique_ptr<Common::Event> wait_event;
+    std::unique_ptr<Common::Event> queue_event;
    std::atomic<bool> is_running{};
 };

--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -580,6 +580,7 @@ private:
            buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
                                     request.data.GetCropRect(), request.data.swap_interval,
                                     request.data.multi_fence);
+            nv_flinger->NotifyQueue();

            IGBPQueueBufferResponseParcel response{1280, 720};
            ctx.WriteBuffer(response.Serialize());