GPU: Corrections and feedback to GPU Clock.

The GPU runs at different clockspeeds and paces than the CPU. It requires its own timer in order to keep it at check. Sadly a cycle timer is unfeasible for the GPU since it's pretty hard to accurately estimate the length of time actions like Draw, DispatchCompute and Clear can take (varying very unproportionally) Using the CPU's clock also has it's disadvantages: - If the CPU is idle while the GPU is running, the timer will advance unproportionally due to how CPU's idle timing works. - If the GPU is synced, the timer won't advance until control is given back to the CPU. For all these reasons, it has been decided to use a host timer for the GPU.
NvServices: Correct GetGPUTime
2019-07-06 10:25:28 -04:00 · 2019-07-06 00:16:46 -04:00 · 2019-07-05 23:48:51 -04:00 · 2019-07-05 23:38:34 -04:00
7 changed files with 115 additions and 4 deletions
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -9,6 +9,8 @@
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
+#include "video_core/gpu.h"
+#include "video_core/gpu_clock.h"

 namespace Service::Nvidia::Devices {

@@ -185,7 +187,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o

    IoctlGetGpuTime params{};
    std::memcpy(&params, input.data(), input.size());
-    const auto ns = Core::Timing::CyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks());
+    const auto ns = Core::System::GetInstance().GPU().Clock().GetNsTime();
    params.gpu_time = static_cast<u64_le>(ns.count());
    std::memcpy(output.data(), &params, output.size());
    return 0;
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -22,6 +22,8 @@ add_library(video_core STATIC
    gpu.h
    gpu_asynch.cpp
    gpu_asynch.h
+    gpu_clock.cpp
+    gpu_clock.h
    gpu_synch.cpp
    gpu_synch.h
    gpu_thread.cpp
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -9,6 +9,7 @@
 #include "core/core_timing.h"
 #include "video_core/debug_utils/debug_utils.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu_clock.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/textures/texture.h"
@@ -329,8 +330,7 @@ void Maxwell3D::ProcessQueryGet() {
            // wait queues.
            LongQueryResult query_result{};
            query_result.value = result;
-            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-            query_result.timestamp = system.CoreTiming().GetTicks();
+            query_result.timestamp = system.GPU().Clock().GetNsTime().count();
            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
        }
        dirty_flags.OnMemoryWrite();
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,6 +12,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
+#include "video_core/gpu_clock.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_base.h"

@@ -31,6 +32,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {

 GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
    auto& rasterizer{renderer.Rasterizer()};
+    clock = std::make_unique<Tegra::GPUClock>();
    memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
@@ -42,6 +44,14 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren

 GPU::~GPU() = default;

+Tegra::GPUClock& GPU::Clock() {
+    return *clock;
+}
+
+const Tegra::GPUClock& GPU::Clock() const {
+    return *clock;
+}
+
 Engines::Maxwell3D& GPU::Maxwell3D() {
    return *maxwell_3d;
 }
@@ -285,7 +295,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
        block.sequence = regs.semaphore_sequence;
        // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
        // CoreTiming
-        block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
+        block.timestamp = clock->GetNsTime().count();
        memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
                                   sizeof(block));
    } else {
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -80,6 +80,7 @@ u32 DepthFormatBytesPerPixel(DepthFormat format);

 struct CommandListHeader;
 class DebugContext;
+class GPUClock;

 /**
 * Struct describing framebuffer configuration
@@ -149,6 +150,12 @@ public:
    /// Calls a GPU method.
    void CallMethod(const MethodCall& method_call);

+    /// Returns a reference to the GPU's Clock.
+    Tegra::GPUClock& Clock();
+
+    /// Returns a const reference to the GPU's Clock.
+    const Tegra::GPUClock& Clock() const;
+
    /// Returns a reference to the Maxwell3D GPU engine.
    Engines::Maxwell3D& Maxwell3D();

@@ -248,6 +255,7 @@ protected:
    VideoCore::RendererBase& renderer;

 private:
+    std::unique_ptr<GPUClock> clock;
    std::unique_ptr<Tegra::MemoryManager> memory_manager;

    /// Mapping of command subchannels to their bound engine ids
--- a/src/video_core/gpu_clock.cpp
+++ b/src/video_core/gpu_clock.cpp
@@ -0,0 +1,20 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/uint128.h"
+#include "video_core/gpu_clock.h"
+
+namespace Tegra {
+
+GPUClock::GPUClock() = default;
+GPUClock::~GPUClock() = default;
+
+u64 GPUClock::GetTicks() const {
+    constexpr u32 ns_per_second = 1000000000U;
+    const u64 ns = GetNsTime().count();
+    const u128 middle = Common::Multiply64Into128(ns, gpu_clock);
+    return Common::Divide128On32(middle, ns_per_second).first;
+}
+
+} // namespace Tegra
--- a/src/video_core/gpu_clock.h
+++ b/src/video_core/gpu_clock.h
@@ -0,0 +1,69 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/**
+ * The GPU runs at different clockspeeds and paces than the CPU. It requires
+ * its own timer in order to keep it at check. Sadly a cycle timer is unfeasible
+ * for the GPU since it's pretty hard to accurately estimate the length of time
+ * actions like Draw, DispatchCompute and Clear can take (varying very unproportionally)
+ * Using the CPU's clock also has it's disadvantages:
+ * - If the CPU is idle while the GPU is running, the timer will advance unproportionally
+ * due to how CPU's idle timing works.
+ * - If the GPU is synced, the timer won't advance until control is given back to
+ * the CPU.
+ * For all these reasons, it has been decided to use a host timer for the GPU.
+ *
+ **/
+
+#pragma once
+
+#include <chrono>
+
+#include "common/common_types.h"
+
+namespace Tegra {
+
+enum GPUClockProfiles : u64 {
+    Profile1 = 384000000,
+    Profile2 = 768000000,
+    Profile3 = 691200000,
+    Profile4 = 230400000,
+    Profile5 = 307000000,
+    Profile6 = 460800000,
+};
+
+class GPUClock {
+public:
+    GPUClock();
+    ~GPUClock();
+
+    u64 GetTicks() const;
+
+    std::chrono::nanoseconds GetNsTime() const {
+        const Clock::time_point now = Clock::now();
+        return std::chrono::duration_cast<std::chrono::nanoseconds>(now - start_walltime);
+    }
+
+    std::chrono::microseconds GetUsTime() const {
+        const Clock::time_point now = Clock::now();
+        return std::chrono::duration_cast<std::chrono::microseconds>(now - start_walltime);
+    }
+
+    std::chrono::milliseconds GetMsTime() const {
+        const Clock::time_point now = Clock::now();
+        return std::chrono::duration_cast<std::chrono::milliseconds>(now - start_walltime);
+    }
+
+    void SetGPUClock(const u64 new_clock) {
+        gpu_clock = new_clock;
+    }
+
+private:
+    using Clock = std::chrono::system_clock;
+    Clock::time_point start_walltime = Clock::now();
+
+    u64 gpu_clock{GPUClockProfiles::Profile1};
+};
+
+} // namespace Tegra
Author	SHA1	Message	Date
Fernando Sahmkow	4771bb88ec	GPU: Corrections and feedback to GPU Clock. The GPU runs at different clockspeeds and paces than the CPU. It requires its own timer in order to keep it at check. Sadly a cycle timer is unfeasible for the GPU since it's pretty hard to accurately estimate the length of time actions like Draw, DispatchCompute and Clear can take (varying very unproportionally) Using the CPU's clock also has it's disadvantages: - If the CPU is idle while the GPU is running, the timer will advance unproportionally due to how CPU's idle timing works. - If the GPU is synced, the timer won't advance until control is given back to the CPU. For all these reasons, it has been decided to use a host timer for the GPU.	2019-07-06 10:25:28 -04:00
Fernando Sahmkow	6f0807b91c	NvServices: Correct GetGPUTime	2019-07-06 00:16:46 -04:00
Fernando Sahmkow	2b1c89a702	GPU: use the gpu's clock instead of core timing	2019-07-05 23:48:51 -04:00
Fernando Sahmkow	dcc1b8c735	GPU: Implement a host based GPU Clock.	2019-07-05 23:38:34 -04:00