Compare commits
10 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
84ad81ee67 | ||
|
|
63aa08acbe | ||
|
|
3f1b4fb23a | ||
|
|
aaa373585c | ||
|
|
7b574f406b | ||
|
|
65651078e5 | ||
|
|
ac51d048a9 | ||
|
|
4483089d70 | ||
|
|
d6015ee211 | ||
|
|
81e086b5ac |
@@ -36,7 +36,8 @@
|
||||
#include "frontend/applets/software_keyboard.h"
|
||||
#include "frontend/applets/web_browser.h"
|
||||
#include "video_core/debug_utils/debug_utils.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_asynch.h"
|
||||
#include "video_core/gpu_synch.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
@@ -129,10 +130,16 @@ struct System::Impl {
|
||||
return ResultStatus::ErrorVideoCore;
|
||||
}
|
||||
|
||||
gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer());
|
||||
is_powered_on = true;
|
||||
|
||||
if (Settings::values.use_asynchronous_gpu_emulation) {
|
||||
gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
|
||||
} else {
|
||||
gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
|
||||
}
|
||||
|
||||
cpu_core_manager.Initialize(system);
|
||||
is_powered_on = true;
|
||||
|
||||
LOG_DEBUG(Core, "Initialized OK");
|
||||
|
||||
// Reset counters and set time origin to current frame
|
||||
|
||||
@@ -9,32 +9,43 @@
|
||||
|
||||
#include <opus.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/hle/ipc_helpers.h"
|
||||
#include "core/hle/kernel/hle_ipc.h"
|
||||
#include "core/hle/service/audio/hwopus.h"
|
||||
|
||||
namespace Service::Audio {
|
||||
namespace {
|
||||
|
||||
struct OpusDeleter {
|
||||
void operator()(void* ptr) const {
|
||||
operator delete(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
using OpusDecoderPtr = std::unique_ptr<OpusDecoder, OpusDeleter>;
|
||||
|
||||
struct OpusPacketHeader {
|
||||
// Packet size in bytes.
|
||||
u32_be size;
|
||||
// Indicates the final range of the codec's entropy coder.
|
||||
u32_be final_range;
|
||||
};
|
||||
static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
|
||||
|
||||
class OpusDecoderStateBase {
|
||||
class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
|
||||
public:
|
||||
IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate,
|
||||
u32 channel_count)
|
||||
: ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
|
||||
sample_rate(sample_rate), channel_count(channel_count) {
|
||||
// clang-format off
|
||||
static const FunctionInfo functions[] = {
|
||||
{0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
|
||||
{1, nullptr, "SetContext"},
|
||||
{2, nullptr, "DecodeInterleavedForMultiStreamOld"},
|
||||
{3, nullptr, "SetContextForMultiStream"},
|
||||
{4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
|
||||
{5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
|
||||
{6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
|
||||
{7, nullptr, "DecodeInterleavedForMultiStream"},
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
RegisterHandlers(functions);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Describes extra behavior that may be asked of the decoding context.
|
||||
enum class ExtraBehavior {
|
||||
/// No extra behavior.
|
||||
@@ -44,36 +55,30 @@ public:
|
||||
ResetContext,
|
||||
};
|
||||
|
||||
enum class PerfTime {
|
||||
Disabled,
|
||||
Enabled,
|
||||
};
|
||||
void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
|
||||
virtual ~OpusDecoderStateBase() = default;
|
||||
|
||||
// Decodes interleaved Opus packets. Optionally allows reporting time taken to
|
||||
// perform the decoding, as well as any relevant extra behavior.
|
||||
virtual void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
|
||||
ExtraBehavior extra_behavior) = 0;
|
||||
};
|
||||
|
||||
// Represents the decoder state for a non-multistream decoder.
|
||||
class OpusDecoderState final : public OpusDecoderStateBase {
|
||||
public:
|
||||
explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
|
||||
: decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
|
||||
|
||||
void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
|
||||
ExtraBehavior extra_behavior) override {
|
||||
if (perf_time == PerfTime::Disabled) {
|
||||
DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
|
||||
} else {
|
||||
u64 performance = 0;
|
||||
DecodeInterleavedHelper(ctx, &performance, extra_behavior);
|
||||
}
|
||||
DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None);
|
||||
}
|
||||
|
||||
void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
|
||||
u64 performance = 0;
|
||||
DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None);
|
||||
}
|
||||
|
||||
void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
|
||||
IPC::RequestParser rp{ctx};
|
||||
const auto extra_behavior =
|
||||
rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None;
|
||||
|
||||
u64 performance = 0;
|
||||
DecodeInterleavedHelper(ctx, &performance, extra_behavior);
|
||||
}
|
||||
|
||||
private:
|
||||
void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
|
||||
ExtraBehavior extra_behavior) {
|
||||
u32 consumed = 0;
|
||||
@@ -84,7 +89,8 @@ private:
|
||||
ResetDecoderContext();
|
||||
}
|
||||
|
||||
if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
|
||||
if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples,
|
||||
performance)) {
|
||||
LOG_ERROR(Audio, "Failed to decode opus data");
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
// TODO(ogniK): Use correct error code
|
||||
@@ -103,27 +109,27 @@ private:
|
||||
ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
|
||||
}
|
||||
|
||||
bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
|
||||
std::vector<opus_int16>& output, u64* out_performance_time) const {
|
||||
bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input,
|
||||
std::vector<opus_int16>& output, u64* out_performance_time) {
|
||||
const auto start_time = std::chrono::high_resolution_clock::now();
|
||||
const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
|
||||
if (sizeof(OpusPacketHeader) > input.size()) {
|
||||
if (sizeof(OpusHeader) > input.size()) {
|
||||
LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
|
||||
sizeof(OpusPacketHeader), input.size());
|
||||
sizeof(OpusHeader), input.size());
|
||||
return false;
|
||||
}
|
||||
|
||||
OpusPacketHeader hdr{};
|
||||
std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader));
|
||||
if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) {
|
||||
OpusHeader hdr{};
|
||||
std::memcpy(&hdr, input.data(), sizeof(OpusHeader));
|
||||
if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) {
|
||||
LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}",
|
||||
sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size());
|
||||
sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size());
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto frame = input.data() + sizeof(OpusPacketHeader);
|
||||
const auto frame = input.data() + sizeof(OpusHeader);
|
||||
const auto decoded_sample_count = opus_packet_get_nb_samples(
|
||||
frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)),
|
||||
frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)),
|
||||
static_cast<opus_int32>(sample_rate));
|
||||
if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
|
||||
LOG_ERROR(
|
||||
@@ -135,18 +141,18 @@ private:
|
||||
|
||||
const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
|
||||
const auto out_sample_count =
|
||||
opus_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
|
||||
opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0);
|
||||
if (out_sample_count < 0) {
|
||||
LOG_ERROR(Audio,
|
||||
"Incorrect sample count received from opus_decode, "
|
||||
"output_sample_count={}, frame_size={}, data_sz_from_hdr={}",
|
||||
out_sample_count, frame_size, static_cast<u32>(hdr.size));
|
||||
out_sample_count, frame_size, static_cast<u32>(hdr.sz));
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
|
||||
sample_count = out_sample_count;
|
||||
consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
|
||||
consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz);
|
||||
if (out_performance_time != nullptr) {
|
||||
*out_performance_time =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
|
||||
@@ -161,66 +167,21 @@ private:
|
||||
opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
|
||||
}
|
||||
|
||||
OpusDecoderPtr decoder;
|
||||
struct OpusHeader {
|
||||
u32_be sz; // Needs to be BE for some odd reason
|
||||
INSERT_PADDING_WORDS(1);
|
||||
};
|
||||
static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size");
|
||||
|
||||
std::unique_ptr<OpusDecoder, OpusDeleter> decoder;
|
||||
u32 sample_rate;
|
||||
u32 channel_count;
|
||||
};
|
||||
|
||||
class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
|
||||
public:
|
||||
explicit IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoderStateBase> decoder_state)
|
||||
: ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
|
||||
// clang-format off
|
||||
static const FunctionInfo functions[] = {
|
||||
{0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
|
||||
{1, nullptr, "SetContext"},
|
||||
{2, nullptr, "DecodeInterleavedForMultiStreamOld"},
|
||||
{3, nullptr, "SetContextForMultiStream"},
|
||||
{4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
|
||||
{5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
|
||||
{6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
|
||||
{7, nullptr, "DecodeInterleavedForMultiStream"},
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
RegisterHandlers(functions);
|
||||
}
|
||||
|
||||
private:
|
||||
void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
|
||||
decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Disabled,
|
||||
OpusDecoderStateBase::ExtraBehavior::None);
|
||||
}
|
||||
|
||||
void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
|
||||
decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
|
||||
OpusDecoderStateBase::ExtraBehavior::None);
|
||||
}
|
||||
|
||||
void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
|
||||
IPC::RequestParser rp{ctx};
|
||||
const auto extra_behavior = rp.Pop<bool>()
|
||||
? OpusDecoderStateBase::ExtraBehavior::ResetContext
|
||||
: OpusDecoderStateBase::ExtraBehavior::None;
|
||||
|
||||
decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
|
||||
extra_behavior);
|
||||
}
|
||||
|
||||
std::unique_ptr<OpusDecoderStateBase> decoder_state;
|
||||
};
|
||||
|
||||
std::size_t WorkerBufferSize(u32 channel_count) {
|
||||
static std::size_t WorkerBufferSize(u32 channel_count) {
|
||||
ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
|
||||
return opus_decoder_get_size(static_cast<int>(channel_count));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp{ctx};
|
||||
@@ -259,7 +220,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
|
||||
const std::size_t worker_sz = WorkerBufferSize(channel_count);
|
||||
ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
|
||||
|
||||
OpusDecoderPtr decoder{static_cast<OpusDecoder*>(operator new(worker_sz))};
|
||||
std::unique_ptr<OpusDecoder, OpusDeleter> decoder{
|
||||
static_cast<OpusDecoder*>(operator new(worker_sz))};
|
||||
if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
|
||||
LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err);
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
@@ -270,8 +232,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.PushIpcInterface<IHardwareOpusDecoderManager>(
|
||||
std::make_unique<OpusDecoderState>(std::move(decoder), sample_rate, channel_count));
|
||||
rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate,
|
||||
channel_count);
|
||||
}
|
||||
|
||||
HwOpus::HwOpus() : ServiceFramework("hwopus") {
|
||||
|
||||
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
|
||||
|
||||
auto& instance = Core::System::GetInstance();
|
||||
instance.GetPerfStats().EndGameFrame();
|
||||
instance.Renderer().SwapBuffers(framebuffer);
|
||||
instance.GPU().SwapBuffers(framebuffer);
|
||||
}
|
||||
|
||||
} // namespace Service::Nvidia::Devices
|
||||
|
||||
@@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
|
||||
auto& gpu = system_instance.GPU();
|
||||
auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
|
||||
ASSERT(cpu_addr);
|
||||
system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
|
||||
gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
|
||||
|
||||
params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
|
||||
|
||||
|
||||
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
if (entries.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
|
||||
dma_pusher.Push(std::move(entries));
|
||||
dma_pusher.DispatchCalls();
|
||||
}
|
||||
|
||||
u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
|
||||
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
|
||||
UNIMPLEMENTED();
|
||||
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
|
||||
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
|
||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||
|
||||
PushGPUEntries(std::move(entries));
|
||||
Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
|
||||
|
||||
params.fence_out.id = 0;
|
||||
params.fence_out.value = 0;
|
||||
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
|
||||
Memory::ReadBlock(params.address, entries.data(),
|
||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||
|
||||
PushGPUEntries(std::move(entries));
|
||||
Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
|
||||
|
||||
params.fence_out.id = 0;
|
||||
params.fence_out.value = 0;
|
||||
|
||||
@@ -186,7 +186,7 @@ void NVFlinger::Compose() {
|
||||
|
||||
// There was no queued buffer to draw, render previous frame
|
||||
system_instance.GetPerfStats().EndGameFrame();
|
||||
system_instance.Renderer().SwapBuffers({});
|
||||
system_instance.GPU().SwapBuffers({});
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -356,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
|
||||
const VAddr overlap_end = std::min(end, region_end);
|
||||
const VAddr overlap_size = overlap_end - overlap_start;
|
||||
|
||||
auto& rasterizer = system_instance.Renderer().Rasterizer();
|
||||
auto& gpu = system_instance.GPU();
|
||||
switch (mode) {
|
||||
case FlushMode::Flush:
|
||||
rasterizer.FlushRegion(overlap_start, overlap_size);
|
||||
gpu.FlushRegion(overlap_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::Invalidate:
|
||||
rasterizer.InvalidateRegion(overlap_start, overlap_size);
|
||||
gpu.InvalidateRegion(overlap_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::FlushAndInvalidate:
|
||||
rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size);
|
||||
gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -393,6 +393,7 @@ struct Values {
|
||||
u16 frame_limit;
|
||||
bool use_disk_shader_cache;
|
||||
bool use_accurate_gpu_emulation;
|
||||
bool use_asynchronous_gpu_emulation;
|
||||
|
||||
float bg_red;
|
||||
float bg_green;
|
||||
|
||||
@@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() {
|
||||
Settings::values.use_disk_shader_cache);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
|
||||
Settings::values.use_accurate_gpu_emulation);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
|
||||
Settings::values.use_asynchronous_gpu_emulation);
|
||||
AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
|
||||
Settings::values.use_docked_mode);
|
||||
}
|
||||
|
||||
@@ -17,6 +17,12 @@ add_library(video_core STATIC
|
||||
engines/shader_header.h
|
||||
gpu.cpp
|
||||
gpu.h
|
||||
gpu_asynch.cpp
|
||||
gpu_asynch.h
|
||||
gpu_synch.cpp
|
||||
gpu_synch.h
|
||||
gpu_thread.cpp
|
||||
gpu_thread.h
|
||||
macro_interpreter.cpp
|
||||
macro_interpreter.h
|
||||
memory_manager.cpp
|
||||
|
||||
@@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) {
|
||||
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
|
||||
// We do this before actually writing the new data because the destination address might contain
|
||||
// a dirty surface that will have to be written back to memory.
|
||||
rasterizer.InvalidateRegion(*dest_address, sizeof(u32));
|
||||
Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
|
||||
|
||||
Memory::Write32(*dest_address, data);
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
|
||||
@@ -92,12 +92,12 @@ void MaxwellDMA::HandleCopy() {
|
||||
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
|
||||
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
|
||||
// copying.
|
||||
rasterizer.FlushRegion(*source_cpu, src_size);
|
||||
Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
|
||||
|
||||
// We have to invalidate the destination region to evict any outdated surfaces from the
|
||||
// cache. We do this before actually writing the new data because the destination address
|
||||
// might contain a dirty surface that will have to be written back to memory.
|
||||
rasterizer.InvalidateRegion(*dest_cpu, dst_size);
|
||||
Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
|
||||
};
|
||||
|
||||
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
@@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) {
|
||||
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
|
||||
auto& rasterizer{renderer.Rasterizer()};
|
||||
memory_manager = std::make_unique<Tegra::MemoryManager>();
|
||||
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
|
||||
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
|
||||
|
||||
@@ -16,8 +16,8 @@ class System;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
@@ -119,9 +119,10 @@ enum class EngineID {
|
||||
MAXWELL_DMA_COPY_A = 0xB0B5,
|
||||
};
|
||||
|
||||
class GPU final {
|
||||
class GPU {
|
||||
public:
|
||||
explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
|
||||
explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
|
||||
|
||||
~GPU();
|
||||
|
||||
struct MethodCall {
|
||||
@@ -200,8 +201,42 @@ public:
|
||||
};
|
||||
} regs{};
|
||||
|
||||
/// Push GPU command entries to be processed
|
||||
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
virtual void SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
private:
|
||||
void ProcessBindMethod(const MethodCall& method_call);
|
||||
void ProcessSemaphoreTriggerMethod();
|
||||
void ProcessSemaphoreRelease();
|
||||
void ProcessSemaphoreAcquire();
|
||||
|
||||
/// Calls a GPU puller method.
|
||||
void CallPullerMethod(const MethodCall& method_call);
|
||||
|
||||
/// Calls a GPU engine method.
|
||||
void CallEngineMethod(const MethodCall& method_call);
|
||||
|
||||
/// Determines where the method should be executed.
|
||||
bool ExecuteMethodOnEngine(const MethodCall& method_call);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
|
||||
VideoCore::RendererBase& renderer;
|
||||
|
||||
private:
|
||||
std::unique_ptr<Tegra::MemoryManager> memory_manager;
|
||||
|
||||
/// Mapping of command subchannels to their bound engine ids.
|
||||
@@ -217,18 +252,6 @@ private:
|
||||
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
|
||||
/// Inline memory engine
|
||||
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
|
||||
|
||||
void ProcessBindMethod(const MethodCall& method_call);
|
||||
void ProcessSemaphoreTriggerMethod();
|
||||
void ProcessSemaphoreRelease();
|
||||
void ProcessSemaphoreAcquire();
|
||||
|
||||
// Calls a GPU puller method.
|
||||
void CallPullerMethod(const MethodCall& method_call);
|
||||
// Calls a GPU engine method.
|
||||
void CallEngineMethod(const MethodCall& method_call);
|
||||
// Determines where the method should be executed.
|
||||
bool ExecuteMethodOnEngine(const MethodCall& method_call);
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
|
||||
37
src/video_core/gpu_asynch.cpp
Normal file
37
src/video_core/gpu_asynch.cpp
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/gpu_asynch.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
|
||||
: Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
|
||||
|
||||
GPUAsynch::~GPUAsynch() = default;
|
||||
|
||||
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
gpu_thread.SubmitList(std::move(entries));
|
||||
}
|
||||
|
||||
void GPUAsynch::SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
||||
gpu_thread.SwapBuffers(std::move(framebuffer));
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
37
src/video_core/gpu_asynch.h
Normal file
37
src/video_core/gpu_asynch.h
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
namespace GPUThread {
|
||||
class ThreadManager;
|
||||
} // namespace GPUThread
|
||||
|
||||
/// Implementation of GPU interface that runs the GPU asynchronously
|
||||
class GPUAsynch : public Tegra::GPU {
|
||||
public:
|
||||
explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
|
||||
~GPUAsynch();
|
||||
|
||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
|
||||
private:
|
||||
GPUThread::ThreadManager gpu_thread;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
37
src/video_core/gpu_synch.cpp
Normal file
37
src/video_core/gpu_synch.cpp
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/gpu_synch.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
|
||||
: Tegra::GPU(system, renderer) {}
|
||||
|
||||
GPUSynch::~GPUSynch() = default;
|
||||
|
||||
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
dma_pusher->Push(std::move(entries));
|
||||
dma_pusher->DispatchCalls();
|
||||
}
|
||||
|
||||
void GPUSynch::SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
||||
renderer.SwapBuffers(std::move(framebuffer));
|
||||
}
|
||||
|
||||
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
|
||||
renderer.Rasterizer().FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer.Rasterizer().InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
29
src/video_core/gpu_synch.h
Normal file
29
src/video_core/gpu_synch.h
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
/// Implementation of GPU interface that runs the GPU synchronously
|
||||
class GPUSynch : public Tegra::GPU {
|
||||
public:
|
||||
explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
|
||||
~GPUSynch();
|
||||
|
||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
152
src/video_core/gpu_thread.cpp
Normal file
152
src/video_core/gpu_thread.cpp
Normal file
@@ -0,0 +1,152 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "core/frontend/scope_acquire_window_context.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCommon::GPUThread {
|
||||
|
||||
/// Executes a single GPU thread command
|
||||
static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
|
||||
Tegra::DmaPusher& dma_pusher) {
|
||||
if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
|
||||
dma_pusher.Push(std::move(submit_list->entries));
|
||||
dma_pusher.DispatchCalls();
|
||||
} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
|
||||
renderer.SwapBuffers(data->framebuffer);
|
||||
} else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
|
||||
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
|
||||
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
|
||||
renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs the GPU thread
|
||||
static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
|
||||
SynchState& state) {
|
||||
|
||||
MicroProfileOnThreadCreate("GpuThread");
|
||||
|
||||
auto WaitForWakeup = [&]() {
|
||||
std::unique_lock<std::mutex> lock{state.signal_mutex};
|
||||
state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
|
||||
};
|
||||
|
||||
// Wait for first GPU command before acquiring the window context
|
||||
WaitForWakeup();
|
||||
|
||||
// If emulation was stopped during disk shader loading, abort before trying to acquire context
|
||||
if (!state.is_running) {
|
||||
return;
|
||||
}
|
||||
|
||||
Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
|
||||
|
||||
while (state.is_running) {
|
||||
if (!state.is_running) {
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
// Thread has been woken up, so make the previous write queue the next read queue
|
||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
||||
std::swap(state.push_queue, state.pop_queue);
|
||||
}
|
||||
|
||||
// Execute all of the GPU commands
|
||||
while (!state.pop_queue->empty()) {
|
||||
ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
|
||||
state.pop_queue->pop();
|
||||
}
|
||||
|
||||
state.UpdateIdleState();
|
||||
|
||||
// Signal that the GPU thread has finished processing commands
|
||||
if (state.is_idle) {
|
||||
state.idle_condition.notify_one();
|
||||
}
|
||||
|
||||
// Wait for CPU thread to send more GPU commands
|
||||
WaitForWakeup();
|
||||
}
|
||||
}
|
||||
|
||||
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
|
||||
: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
|
||||
std::ref(dma_pusher), std::ref(state)},
|
||||
thread_id{thread.get_id()} {}
|
||||
|
||||
ThreadManager::~ThreadManager() {
|
||||
{
|
||||
// Notify GPU thread that a shutdown is pending
|
||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
||||
state.is_running = false;
|
||||
}
|
||||
|
||||
state.signal_condition.notify_one();
|
||||
thread.join();
|
||||
}
|
||||
|
||||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
||||
if (entries.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
PushCommand(SubmitListCommand(std::move(entries)), false, false);
|
||||
}
|
||||
|
||||
void ThreadManager::SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
||||
PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
|
||||
}
|
||||
|
||||
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||
// Block the CPU when using accurate emulation
|
||||
PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
|
||||
}
|
||||
|
||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||
PushCommand(InvalidateRegionCommand(addr, size), true, true);
|
||||
}
|
||||
|
||||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
||||
|
||||
if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
|
||||
// Execute the command synchronously on the current thread
|
||||
ExecuteCommand(&command_data, renderer, dma_pusher);
|
||||
return;
|
||||
}
|
||||
|
||||
// Push the command to the GPU thread
|
||||
state.UpdateIdleState();
|
||||
state.push_queue->emplace(command_data);
|
||||
}
|
||||
|
||||
// Signal the GPU thread that commands are pending
|
||||
state.signal_condition.notify_one();
|
||||
|
||||
if (wait_for_idle) {
|
||||
// Wait for the GPU to be idle (all commands to be executed)
|
||||
std::unique_lock<std::mutex> lock{state.idle_mutex};
|
||||
state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::GPUThread
|
||||
136
src/video_core/gpu_thread.h
Normal file
136
src/video_core/gpu_thread.h
Normal file
@@ -0,0 +1,136 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
#include <variant>
|
||||
|
||||
namespace Tegra {
|
||||
struct FramebufferConfig;
|
||||
class DmaPusher;
|
||||
} // namespace Tegra
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace VideoCommon::GPUThread {
|
||||
|
||||
/// Command to signal to the GPU thread that a command list is ready for processing
|
||||
struct SubmitListCommand final {
|
||||
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
|
||||
|
||||
Tegra::CommandList entries;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread that a swap buffers is pending
|
||||
struct SwapBuffersCommand final {
|
||||
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
|
||||
: framebuffer{std::move(framebuffer)} {}
|
||||
|
||||
std::optional<const Tegra::FramebufferConfig> framebuffer;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to flush a region
|
||||
struct FlushRegionCommand final {
|
||||
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
|
||||
const VAddr addr;
|
||||
const u64 size;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to invalidate a region
|
||||
struct InvalidateRegionCommand final {
|
||||
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
|
||||
const VAddr addr;
|
||||
const u64 size;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to flush and invalidate a region
|
||||
struct FlushAndInvalidateRegionCommand final {
|
||||
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
|
||||
: addr{addr}, size{size} {}
|
||||
|
||||
const VAddr addr;
|
||||
const u64 size;
|
||||
};
|
||||
|
||||
using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
||||
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
|
||||
|
||||
/// Struct used to synchronize the GPU thread
|
||||
struct SynchState final {
|
||||
std::atomic<bool> is_running{true};
|
||||
std::atomic<bool> is_idle{true};
|
||||
std::condition_variable signal_condition;
|
||||
std::mutex signal_mutex;
|
||||
std::condition_variable idle_condition;
|
||||
std::mutex idle_mutex;
|
||||
|
||||
// We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
|
||||
// one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
|
||||
// empty. This allows for efficient thread-safe access, as it does not require any copies.
|
||||
|
||||
using CommandQueue = std::queue<CommandData>;
|
||||
std::array<CommandQueue, 2> command_queues;
|
||||
CommandQueue* push_queue{&command_queues[0]};
|
||||
CommandQueue* pop_queue{&command_queues[1]};
|
||||
|
||||
void UpdateIdleState() {
|
||||
std::lock_guard<std::mutex> lock{idle_mutex};
|
||||
is_idle = command_queues[0].empty() && command_queues[1].empty();
|
||||
}
|
||||
};
|
||||
|
||||
/// Class used to manage the GPU thread
|
||||
class ThreadManager final {
|
||||
public:
|
||||
explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
|
||||
~ThreadManager();
|
||||
|
||||
/// Push GPU command entries to be processed
|
||||
void SubmitList(Tegra::CommandList&& entries);
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
void SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
void FlushRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
void InvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Waits the caller until the GPU thread is idle, used for synchronization
|
||||
void WaitForIdle();
|
||||
|
||||
private:
|
||||
/// Pushes a command to be executed by the GPU thread
|
||||
void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
|
||||
|
||||
/// Returns true if this is called by the GPU thread
|
||||
bool IsGpuThread() const {
|
||||
return std::this_thread::get_id() == thread_id;
|
||||
}
|
||||
|
||||
private:
|
||||
SynchState state;
|
||||
std::thread thread;
|
||||
std::thread::id thread_id;
|
||||
VideoCore::RendererBase& renderer;
|
||||
Tegra::DmaPusher& dma_pusher;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::GPUThread
|
||||
@@ -749,11 +749,7 @@ void RasterizerOpenGL::FlushAll() {}
|
||||
|
||||
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
if (Settings::values.use_accurate_gpu_emulation) {
|
||||
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
|
||||
res_cache.FlushRegion(addr, size);
|
||||
}
|
||||
res_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
|
||||
@@ -20,10 +20,7 @@
|
||||
EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
|
||||
|
||||
void EmuThread::run() {
|
||||
if (!Settings::values.use_multi_core) {
|
||||
// Single core mode must acquire OpenGL context for entire emulation session
|
||||
render_window->MakeCurrent();
|
||||
}
|
||||
render_window->MakeCurrent();
|
||||
|
||||
MicroProfileOnThreadCreate("EmuThread");
|
||||
|
||||
@@ -38,6 +35,11 @@ void EmuThread::run() {
|
||||
|
||||
emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
|
||||
|
||||
if (Settings::values.use_asynchronous_gpu_emulation) {
|
||||
// Release OpenGL context for the GPU thread
|
||||
render_window->DoneCurrent();
|
||||
}
|
||||
|
||||
// holds whether the cpu was running during the last iteration,
|
||||
// so that the DebugModeLeft signal can be emitted before the
|
||||
// next execution step
|
||||
|
||||
@@ -374,6 +374,8 @@ void Config::ReadValues() {
|
||||
qt_config->value("use_disk_shader_cache", false).toBool();
|
||||
Settings::values.use_accurate_gpu_emulation =
|
||||
qt_config->value("use_accurate_gpu_emulation", false).toBool();
|
||||
Settings::values.use_asynchronous_gpu_emulation =
|
||||
qt_config->value("use_asynchronous_gpu_emulation", false).toBool();
|
||||
|
||||
Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
|
||||
Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
|
||||
@@ -633,6 +635,8 @@ void Config::SaveValues() {
|
||||
qt_config->setValue("frame_limit", Settings::values.frame_limit);
|
||||
qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache);
|
||||
qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);
|
||||
qt_config->setValue("use_asynchronous_gpu_emulation",
|
||||
Settings::values.use_asynchronous_gpu_emulation);
|
||||
|
||||
// Cast to double because Qt's written float values are not human-readable
|
||||
qt_config->setValue("bg_red", (double)Settings::values.bg_red);
|
||||
|
||||
@@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() {
|
||||
ui->frame_limit->setValue(Settings::values.frame_limit);
|
||||
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
|
||||
ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
|
||||
ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
|
||||
ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
|
||||
UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
|
||||
Settings::values.bg_blue));
|
||||
}
|
||||
@@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() {
|
||||
Settings::values.frame_limit = ui->frame_limit->value();
|
||||
Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
|
||||
Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
|
||||
Settings::values.use_asynchronous_gpu_emulation =
|
||||
ui->use_asynchronous_gpu_emulation->isChecked();
|
||||
Settings::values.bg_red = static_cast<float>(bg_color.redF());
|
||||
Settings::values.bg_green = static_cast<float>(bg_color.greenF());
|
||||
Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
|
||||
|
||||
@@ -63,6 +63,13 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="use_asynchronous_gpu_emulation">
|
||||
<property name="text">
|
||||
<string>Use asynchronous GPU emulation</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout">
|
||||
<item>
|
||||
|
||||
@@ -354,6 +354,8 @@ void Config::ReadValues() {
|
||||
sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
|
||||
Settings::values.use_accurate_gpu_emulation =
|
||||
sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
|
||||
Settings::values.use_asynchronous_gpu_emulation =
|
||||
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
|
||||
|
||||
Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0);
|
||||
Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0);
|
||||
|
||||
@@ -118,6 +118,10 @@ use_disk_shader_cache =
|
||||
# 0 (default): Off (fast), 1 : On (slow)
|
||||
use_accurate_gpu_emulation =
|
||||
|
||||
# Whether to use asynchronous GPU emulation
|
||||
# 0 : Off (slow), 1 (default): On (fast)
|
||||
use_asynchronous_gpu_emulation =
|
||||
|
||||
# The clear color for the renderer. What shows up on the sides of the bottom screen.
|
||||
# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
|
||||
bg_red =
|
||||
|
||||
Reference in New Issue
Block a user