Compare commits

...

4 Commits

Author SHA1 Message Date
Fernando Sahmkow
4771bb88ec GPU: Corrections and feedback to GPU Clock.
The GPU runs at different clockspeeds and paces than the CPU. It 
requires its own timer in order to keep it at check. Sadly a cycle timer 
is unfeasible for the GPU since it's pretty hard to accurately estimate 
the length of time actions like Draw, DispatchCompute and Clear can take 
(varying very unproportionally)
Using the CPU's clock also has it's disadvantages:

- If the CPU is idle while the GPU is running, the timer will advance 
unproportionally due to how CPU's idle timing works.

- If the GPU is synced, the timer won't advance until control is given 
back to the CPU.

For all these reasons, it has been decided to use a host timer for the 
GPU.
2019-07-06 10:25:28 -04:00
Fernando Sahmkow
6f0807b91c NvServices: Correct GetGPUTime 2019-07-06 00:16:46 -04:00
Fernando Sahmkow
2b1c89a702 GPU: use the gpu's clock instead of core timing 2019-07-05 23:48:51 -04:00
Fernando Sahmkow
dcc1b8c735 GPU: Implement a host based GPU Clock. 2019-07-05 23:38:34 -04:00
7 changed files with 115 additions and 4 deletions

View File

@@ -9,6 +9,8 @@
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
#include "video_core/gpu.h"
#include "video_core/gpu_clock.h"
namespace Service::Nvidia::Devices {
@@ -185,7 +187,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
IoctlGetGpuTime params{};
std::memcpy(&params, input.data(), input.size());
const auto ns = Core::Timing::CyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks());
const auto ns = Core::System::GetInstance().GPU().Clock().GetNsTime();
params.gpu_time = static_cast<u64_le>(ns.count());
std::memcpy(output.data(), &params, output.size());
return 0;

View File

@@ -22,6 +22,8 @@ add_library(video_core STATIC
gpu.h
gpu_asynch.cpp
gpu_asynch.h
gpu_clock.cpp
gpu_clock.h
gpu_synch.cpp
gpu_synch.h
gpu_thread.cpp

View File

@@ -9,6 +9,7 @@
#include "core/core_timing.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu_clock.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/textures/texture.h"
@@ -329,8 +330,7 @@ void Maxwell3D::ProcessQueryGet() {
// wait queues.
LongQueryResult query_result{};
query_result.value = result;
// TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
query_result.timestamp = system.CoreTiming().GetTicks();
query_result.timestamp = system.GPU().Clock().GetNsTime().count();
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
}
dirty_flags.OnMemoryWrite();

View File

@@ -12,6 +12,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
#include "video_core/gpu_clock.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
@@ -31,6 +32,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
auto& rasterizer{renderer.Rasterizer()};
clock = std::make_unique<Tegra::GPUClock>();
memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
@@ -42,6 +44,14 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
GPU::~GPU() = default;
Tegra::GPUClock& GPU::Clock() {
return *clock;
}
const Tegra::GPUClock& GPU::Clock() const {
return *clock;
}
Engines::Maxwell3D& GPU::Maxwell3D() {
return *maxwell_3d;
}
@@ -285,7 +295,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
block.timestamp = clock->GetNsTime().count();
memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
sizeof(block));
} else {

View File

@@ -80,6 +80,7 @@ u32 DepthFormatBytesPerPixel(DepthFormat format);
struct CommandListHeader;
class DebugContext;
class GPUClock;
/**
* Struct describing framebuffer configuration
@@ -149,6 +150,12 @@ public:
/// Calls a GPU method.
void CallMethod(const MethodCall& method_call);
/// Returns a reference to the GPU's Clock.
Tegra::GPUClock& Clock();
/// Returns a const reference to the GPU's Clock.
const Tegra::GPUClock& Clock() const;
/// Returns a reference to the Maxwell3D GPU engine.
Engines::Maxwell3D& Maxwell3D();
@@ -248,6 +255,7 @@ protected:
VideoCore::RendererBase& renderer;
private:
std::unique_ptr<GPUClock> clock;
std::unique_ptr<Tegra::MemoryManager> memory_manager;
/// Mapping of command subchannels to their bound engine ids

View File

@@ -0,0 +1,20 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/uint128.h"
#include "video_core/gpu_clock.h"
namespace Tegra {
GPUClock::GPUClock() = default;
GPUClock::~GPUClock() = default;
u64 GPUClock::GetTicks() const {
constexpr u32 ns_per_second = 1000000000U;
const u64 ns = GetNsTime().count();
const u128 middle = Common::Multiply64Into128(ns, gpu_clock);
return Common::Divide128On32(middle, ns_per_second).first;
}
} // namespace Tegra

View File

@@ -0,0 +1,69 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
/**
* The GPU runs at different clockspeeds and paces than the CPU. It requires
* its own timer in order to keep it at check. Sadly a cycle timer is unfeasible
* for the GPU since it's pretty hard to accurately estimate the length of time
* actions like Draw, DispatchCompute and Clear can take (varying very unproportionally)
* Using the CPU's clock also has it's disadvantages:
* - If the CPU is idle while the GPU is running, the timer will advance unproportionally
* due to how CPU's idle timing works.
* - If the GPU is synced, the timer won't advance until control is given back to
* the CPU.
* For all these reasons, it has been decided to use a host timer for the GPU.
*
**/
#pragma once
#include <chrono>
#include "common/common_types.h"
namespace Tegra {
enum GPUClockProfiles : u64 {
Profile1 = 384000000,
Profile2 = 768000000,
Profile3 = 691200000,
Profile4 = 230400000,
Profile5 = 307000000,
Profile6 = 460800000,
};
class GPUClock {
public:
GPUClock();
~GPUClock();
u64 GetTicks() const;
std::chrono::nanoseconds GetNsTime() const {
const Clock::time_point now = Clock::now();
return std::chrono::duration_cast<std::chrono::nanoseconds>(now - start_walltime);
}
std::chrono::microseconds GetUsTime() const {
const Clock::time_point now = Clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(now - start_walltime);
}
std::chrono::milliseconds GetMsTime() const {
const Clock::time_point now = Clock::now();
return std::chrono::duration_cast<std::chrono::milliseconds>(now - start_walltime);
}
void SetGPUClock(const u64 new_clock) {
gpu_clock = new_clock;
}
private:
using Clock = std::chrono::system_clock;
Clock::time_point start_walltime = Clock::now();
u64 gpu_clock{GPUClockProfiles::Profile1};
};
} // namespace Tegra