Compare commits

..

45 Commits

Author SHA1 Message Date
Kelebek1
ca6bf06ef7 Log object names with debug renderer, add a GPU address to ImageViews 2023-05-06 04:48:32 +01:00
bunnei
bb2e407772 Merge pull request #10159 from german77/home_screenshot
core: hid: Fix state of capture and home buttons
2023-05-05 12:02:15 -07:00
german77
8df3aed2f1 core: hid: Fix state of capture and home buttons 2023-05-04 22:36:59 -06:00
liamwhite
16939b1a6e Merge pull request #10128 from Kelebek1/audren_terminate
Wait for the terminate event before destroying a system instance
2023-05-04 14:44:09 -04:00
liamwhite
60d54d911e Merge pull request #10145 from Kelebek1/code_size
Fix shader code resize to use word size rather than byte size
2023-05-04 14:44:02 -04:00
liamwhite
e2b81ae5fe Merge pull request #10156 from v1993/looks-decent-to-me
Remove LGTM config
2023-05-04 14:43:55 -04:00
Valeri
b095a0242d Remove LGTM config
LGTM.com is no longer available since it was superseded by CodeQL.
2023-05-04 15:36:47 +03:00
Fernando S
2506594c50 Merge pull request #10153 from FernandoS27/a-quickie-fixie
Memory manager: Fix possible softlock
2023-05-04 03:56:53 +02:00
Fernando S
7d5683c63c Merge pull request #10154 from liamwhite/optimistic
settings: remove pessimistic flushing
2023-05-04 01:55:51 +02:00
bunnei
055ee84024 Merge pull request #10142 from FernandoS27/missing-astc
GPU: implement missing ASTC
2023-05-03 16:49:27 -07:00
Liam
ae59ffc56c settings: remove pessimistic flushing 2023-05-03 18:52:42 -04:00
Fernando Sahmkow
1c13c74295 Memory manager: Fix possible softlock 2023-05-04 00:15:21 +02:00
bunnei
a661c547d8 Merge pull request #10088 from FernandoS27/100-gelato-flavor-test-builds-later
Y.F.C Implement Asynchronous Fence manager and Rework Query async downloads
2023-05-03 15:10:22 -07:00
bunnei
737e1ca101 Merge pull request #10117 from liamwhite/sync-register
kernel: match calls to Register and Unregister
2023-05-03 09:07:19 -07:00
Fernando Sahmkow
87a9be8dec GPU: implement missing ASTC 2023-05-03 11:33:28 -04:00
liamwhite
ffeb5cdd8d Merge pull request #10151 from GPUCode/no-softlocks-please
Fix softlocks when disabling async present
2023-05-03 10:54:24 -04:00
Morph
3ba95402fd Merge pull request #10146 from liamwhite/catch3
catch2: update to 3.3.1
2023-05-03 10:53:12 -04:00
Morph
8dd3baa562 Merge pull request #10144 from liamwhite/dont-turbo
vulkan: disable turbo when debugging tool is attached
2023-05-03 10:53:03 -04:00
Morph
daf7936095 Merge pull request #10143 from liamwhite/fruit-company-moment
video_core: fix build on Apple Clang
2023-05-03 10:52:56 -04:00
Morph
627022bef9 Merge pull request #10124 from liamwhite/pebkac
settings: rename extended memory layout to unsafe, move from general to system
2023-05-03 10:52:45 -04:00
GPUCode
f3fcc15ad5 vk_present_manager: Fix softlocks when disabling async present 2023-05-03 07:50:10 +03:00
Liam
d1dd54cbfa catch2: update to 3.3.1 2023-05-02 21:27:17 -04:00
bunnei
8f43b05d6b Merge pull request #9973 from GPUCode/async-present
Implement asynchronous presentation
2023-05-02 17:54:57 -07:00
Kelebek1
f902cc2a2b Fix code resize to use word size rather than byte size 2023-05-02 23:52:21 +01:00
liamwhite
451b1bba26 Merge pull request #10133 from lat9nq/clang-shadow-and-fallthrough
CMakeLists: Enable more checks on Clang
2023-05-02 18:18:46 -04:00
liamwhite
494cc992eb Merge pull request #10130 from liamwhite/keys
qt: warn on inoperable keys
2023-05-02 18:17:24 -04:00
liamwhite
f603dc9763 Merge pull request #10123 from Kelebek1/sample_mask
Define SampleMask as an array
2023-05-02 18:17:15 -04:00
Liam
e1c74cea10 video_core: fix build on Apple Clang 2023-05-02 18:05:30 -04:00
lat9nq
f7292c776b CMake: Enable type limits on Clang 2023-05-02 02:07:36 -04:00
lat9nq
2007d0e4a0 CMakeLists: Enable checks on Clang
Enables shadow-uncaptured-locals and implicit-fallthrough for Clang.
implicit-fallthrough is not enabled by default in -Wall or -Wextra, and
shadow-uncaptured-local doesn't seem to be enabled by default by
-Wshadow, even though GCC has both of these by their respective cases.
2023-05-02 01:57:22 -04:00
GPUCode
f403d27941 vk_present_manager: Add toggle for async presentation 2023-05-01 23:13:24 +03:00
GPUCode
1d7abac84b vk_blit_screen: Recreate FSR when frame is recreated
* Depends on the layout dimentions and thus should be recreated as well
2023-05-01 23:13:24 +03:00
GPUCode
50791cb974 renderer_vulkan: Fix crashing when updating descriptors
* During pipeline configure the function would acquire some payload space from the descriptor update queue,
  write the descriptor data on the GPU thread and give the scheduler a pointer to the beginning of said space to update it later.
  TickFrame resets the payload cursor, used to track acquires, back to the beginning of the buffer.
  This wasn't a problem before since WaitWorker was called at the end of the frame but now it is.
  If a frame writes to a cursor before the scheduler catches up, it will crash

* To fix this the payload buffer has been increased to account for the in flight frames that are allowed to exist now.
  TickFrame will switch between the payload spaces instead of resetting
2023-05-01 23:13:24 +03:00
GPUCode
2ad9acf795 renderer_vulkan: Async presentation 2023-05-01 23:13:24 +03:00
Liam
7ec66db22c qt: warn on inoperable keys 2023-04-30 23:47:42 -04:00
Kelebek1
2feb40f14d Wait for the terminate event before destroying a system instance 2023-05-01 00:27:12 +01:00
Liam
2cd9e1ecb6 settings: rename extended memory layout to unsafe, move from general to system 2023-04-30 14:24:22 -04:00
Kelebek1
b566c98258 Define SampleMask as an array 2023-04-30 18:37:37 +01:00
Liam
1b5c87ab6a kernel: match calls to Register and Unregister 2023-04-29 21:52:26 -04:00
Fernando Sahmkow
2f15876524 QueryCache: Fix write invalidation. 2023-04-28 23:53:46 +02:00
Fernando Sahmkow
9a7c172f76 MemoryManager: Fix race conditions. 2023-04-28 23:53:02 +02:00
Fernando Sahmkow
e4dc73f61e Clang format and ddress feedback 2023-04-24 12:38:47 +02:00
Fernando Sahmkow
e29ced29fa QueryCache: rework async downloads. 2023-04-23 22:04:14 +02:00
Fernando Sahmkow
7e76c1642c Accuracy Normal: reduce accuracy further for perf improvements in Project Lime 2023-04-23 22:03:44 +02:00
Fernando Sahmkow
fca72beb2d Fence Manager: implement async fence management in a sepparate thread. 2023-04-23 04:48:50 +02:00
82 changed files with 1299 additions and 442 deletions

View File

@@ -1,13 +0,0 @@
# SPDX-FileCopyrightText: 2020 yuzu Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later
path_classifiers:
library: "externals"
extraction:
cpp:
prepare:
packages:
- "libsdl2-dev"
- "qtmultimedia5-dev"
- "libtbb-dev"
- "libjack-jackd2-dev"

View File

@@ -113,6 +113,9 @@ else()
$<$<CXX_COMPILER_ID:Clang>:-Wno-braced-scalar-init>
$<$<CXX_COMPILER_ID:Clang>:-Wno-unused-private-field>
$<$<CXX_COMPILER_ID:Clang>:-Werror=shadow-uncaptured-local>
$<$<CXX_COMPILER_ID:Clang>:-Werror=implicit-fallthrough>
$<$<CXX_COMPILER_ID:Clang>:-Werror=type-limits>
$<$<CXX_COMPILER_ID:AppleClang>:-Wno-braced-scalar-init>
$<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-private-field>
)

View File

@@ -436,10 +436,7 @@ void System::Stop() {
}
if (execution_mode == ExecutionMode::Auto) {
// Should wait for the system to terminate here, but core timing (should have) already
// stopped, so this isn't needed. Find a way to make this definite.
// terminate_event.Wait();
terminate_event.Wait();
}
}

View File

@@ -45,6 +45,7 @@ void LogSettings() {
log_setting("System_LanguageIndex", values.language_index.GetValue());
log_setting("System_RegionIndex", values.region_index.GetValue());
log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue());
log_setting("System_UnsafeMemoryLayout", values.use_unsafe_extended_memory_layout.GetValue());
log_setting("Core_UseMultiCore", values.use_multi_core.GetValue());
log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue());
log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue());
@@ -191,7 +192,7 @@ void RestoreGlobalState(bool is_powered_on) {
// Core
values.use_multi_core.SetGlobal(true);
values.use_extended_memory_layout.SetGlobal(true);
values.use_unsafe_extended_memory_layout.SetGlobal(true);
// CPU
values.cpu_accuracy.SetGlobal(true);
@@ -205,6 +206,7 @@ void RestoreGlobalState(bool is_powered_on) {
// Renderer
values.fsr_sharpening_slider.SetGlobal(true);
values.renderer_backend.SetGlobal(true);
values.async_presentation.SetGlobal(true);
values.renderer_force_max_clock.SetGlobal(true);
values.vulkan_device.SetGlobal(true);
values.fullscreen_mode.SetGlobal(true);
@@ -225,7 +227,6 @@ void RestoreGlobalState(bool is_powered_on) {
values.shader_backend.SetGlobal(true);
values.use_asynchronous_shaders.SetGlobal(true);
values.use_fast_gpu_time.SetGlobal(true);
values.use_pessimistic_flushes.SetGlobal(true);
values.use_vulkan_driver_pipeline_cache.SetGlobal(true);
values.bg_red.SetGlobal(true);
values.bg_green.SetGlobal(true);

View File

@@ -388,7 +388,8 @@ struct Values {
// Core
SwitchableSetting<bool> use_multi_core{true, "use_multi_core"};
SwitchableSetting<bool> use_extended_memory_layout{false, "use_extended_memory_layout"};
SwitchableSetting<bool> use_unsafe_extended_memory_layout{false,
"use_unsafe_extended_memory_layout"};
// Cpu
SwitchableSetting<CPUAccuracy, true> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto,
@@ -422,6 +423,7 @@ struct Values {
// Renderer
SwitchableSetting<RendererBackend, true> renderer_backend{
RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
SwitchableSetting<bool> async_presentation{false, "async_presentation"};
SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"};
Setting<bool> renderer_debug{false, "debug"};
Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
@@ -459,7 +461,6 @@ struct Values {
ShaderBackend::SPIRV, "shader_backend"};
SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"};
SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
"use_vulkan_driver_pipeline_cache"};

View File

@@ -137,7 +137,7 @@ struct System::Impl {
device_memory = std::make_unique<Core::DeviceMemory>();
is_multicore = Settings::values.use_multi_core.GetValue();
extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue();
extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue();
core_timing.SetMulticore(is_multicore);
core_timing.Initialize([&system]() { system.RegisterHostThread(); });
@@ -169,7 +169,7 @@ struct System::Impl {
void ReinitializeIfNecessary(System& system) {
const bool must_reinitialize =
is_multicore != Settings::values.use_multi_core.GetValue() ||
extended_memory_layout != Settings::values.use_extended_memory_layout.GetValue();
extended_memory_layout != Settings::values.use_unsafe_extended_memory_layout.GetValue();
if (!must_reinitialize) {
return;
@@ -178,7 +178,7 @@ struct System::Impl {
LOG_DEBUG(Kernel, "Re-initializing");
is_multicore = Settings::values.use_multi_core.GetValue();
extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue();
extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue();
Initialize(system);
}
@@ -293,6 +293,7 @@ struct System::Impl {
ASSERT(Kernel::KProcess::Initialize(main_process, system, "main",
Kernel::KProcess::ProcessType::Userland, resource_limit)
.IsSuccess());
Kernel::KProcess::Register(system.Kernel(), main_process);
kernel.MakeApplicationProcess(main_process);
const auto [load_result, load_parameters] = app_loader->Load(*main_process, system);
if (load_result != Loader::ResultStatus::Success) {

View File

@@ -551,6 +551,8 @@ void EmulatedController::EnableSystemButtons() {
void EmulatedController::DisableSystemButtons() {
std::scoped_lock lock{mutex};
system_buttons_enabled = false;
controller.home_button_state.raw = 0;
controller.capture_button_state.raw = 0;
}
void EmulatedController::ResetSystemButtons() {
@@ -734,6 +736,8 @@ void EmulatedController::SetButton(const Common::Input::CallbackStatus& callback
if (is_configuring) {
controller.npad_button_state.raw = NpadButton::None;
controller.debug_pad_button_state.raw = 0;
controller.home_button_state.raw = 0;
controller.capture_button_state.raw = 0;
lock.unlock();
TriggerOnChange(ControllerTriggerType::Button, false);
return;

View File

@@ -35,12 +35,13 @@ namespace {
using namespace Common::Literals;
u32 GetMemorySizeForInit() {
return Settings::values.use_extended_memory_layout ? Smc::MemorySize_8GB : Smc::MemorySize_4GB;
return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemorySize_8GB
: Smc::MemorySize_4GB;
}
Smc::MemoryArrangement GetMemoryArrangeForInit() {
return Settings::values.use_extended_memory_layout ? Smc::MemoryArrangement_8GB
: Smc::MemoryArrangement_4GB;
return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemoryArrangement_8GB
: Smc::MemoryArrangement_4GB;
}
} // namespace

View File

@@ -182,8 +182,8 @@ public:
explicit KAutoObjectWithList(KernelCore& kernel) : KAutoObject(kernel) {}
static int Compare(const KAutoObjectWithList& lhs, const KAutoObjectWithList& rhs) {
const u64 lid = lhs.GetId();
const u64 rid = rhs.GetId();
const uintptr_t lid = reinterpret_cast<uintptr_t>(std::addressof(lhs));
const uintptr_t rid = reinterpret_cast<uintptr_t>(std::addressof(rhs));
if (lid < rid) {
return -1;

View File

@@ -95,7 +95,7 @@ struct KernelCore::Impl {
pt_heap_region.GetSize());
}
InitializeHackSharedMemory();
InitializeHackSharedMemory(kernel);
RegisterHostThread(nullptr);
}
@@ -216,10 +216,12 @@ struct KernelCore::Impl {
auto* main_thread{Kernel::KThread::Create(system.Kernel())};
main_thread->SetCurrentCore(core);
ASSERT(Kernel::KThread::InitializeMainThread(system, main_thread, core).IsSuccess());
KThread::Register(system.Kernel(), main_thread);
auto* idle_thread{Kernel::KThread::Create(system.Kernel())};
idle_thread->SetCurrentCore(core);
ASSERT(Kernel::KThread::InitializeIdleThread(system, idle_thread, core).IsSuccess());
KThread::Register(system.Kernel(), idle_thread);
schedulers[i]->Initialize(main_thread, idle_thread, core);
}
@@ -230,6 +232,7 @@ struct KernelCore::Impl {
const Core::Timing::CoreTiming& core_timing) {
system_resource_limit = KResourceLimit::Create(system.Kernel());
system_resource_limit->Initialize(&core_timing);
KResourceLimit::Register(kernel, system_resource_limit);
const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()};
const auto total_size{sizes.first};
@@ -355,6 +358,7 @@ struct KernelCore::Impl {
ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {},
core_id)
.IsSuccess());
KThread::Register(system.Kernel(), shutdown_threads[core_id]);
}
}
@@ -729,7 +733,7 @@ struct KernelCore::Impl {
memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize());
}
void InitializeHackSharedMemory() {
void InitializeHackSharedMemory(KernelCore& kernel) {
// Setup memory regions for emulated processes
// TODO(bunnei): These should not be hardcoded regions initialized within the kernel
constexpr std::size_t hid_size{0x40000};
@@ -746,14 +750,23 @@ struct KernelCore::Impl {
hid_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
Svc::MemoryPermission::Read, hid_size);
KSharedMemory::Register(kernel, hid_shared_mem);
font_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
Svc::MemoryPermission::Read, font_size);
KSharedMemory::Register(kernel, font_shared_mem);
irs_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
Svc::MemoryPermission::Read, irs_size);
KSharedMemory::Register(kernel, irs_shared_mem);
time_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
Svc::MemoryPermission::Read, time_size);
KSharedMemory::Register(kernel, time_shared_mem);
hidbus_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
Svc::MemoryPermission::Read, hidbus_size);
KSharedMemory::Register(kernel, hidbus_shared_mem);
}
std::mutex registered_objects_lock;
@@ -1072,12 +1085,15 @@ static std::jthread RunHostThreadFunc(KernelCore& kernel, KProcess* process,
// Commit the thread reservation.
thread_reservation.Commit();
// Register the thread.
KThread::Register(kernel, thread);
return std::jthread(
[&kernel, thread, thread_name{std::move(thread_name)}, func{std::move(func)}] {
// Set the thread name.
Common::SetCurrentThreadName(thread_name.c_str());
// Register the thread.
// Set the thread as current.
kernel.RegisterHostThread(thread);
// Run the callback.
@@ -1099,6 +1115,9 @@ std::jthread KernelCore::RunOnHostCoreProcess(std::string&& process_name,
// Ensure that we don't hold onto any extra references.
SCOPE_EXIT({ process->Close(); });
// Register the new process.
KProcess::Register(*this, process);
// Run the host thread.
return RunHostThreadFunc(*this, process, std::move(process_name), std::move(func));
}
@@ -1124,6 +1143,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
// Ensure that we don't hold onto any extra references.
SCOPE_EXIT({ process->Close(); });
// Register the new process.
KProcess::Register(*this, process);
// Reserve a new thread from the process resource limit.
KScopedResourceReservation thread_reservation(process, LimitableResource::ThreadCountMax);
ASSERT(thread_reservation.Succeeded());
@@ -1136,6 +1158,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
// Commit the thread reservation.
thread_reservation.Commit();
// Register the new thread.
KThread::Register(*this, thread);
// Begin running the thread.
ASSERT(R_SUCCEEDED(thread->Run()));
}

View File

@@ -156,6 +156,7 @@ public:
auto* session = Kernel::KSession::Create(kernel);
session->Initialize(nullptr, 0);
Kernel::KSession::Register(kernel, session);
auto next_manager = std::make_shared<Service::SessionRequestManager>(
kernel, manager->GetServerManager());

View File

@@ -25,6 +25,9 @@ ServiceContext::ServiceContext(Core::System& system_, std::string name_)
Kernel::KProcess::ProcessType::KernelInternal,
kernel.GetSystemResourceLimit())
.IsSuccess());
// Register the process.
Kernel::KProcess::Register(kernel, process);
process_created = true;
}

View File

@@ -12,6 +12,9 @@ Mutex::Mutex(Core::System& system) : m_system(system) {
m_event = Kernel::KEvent::Create(system.Kernel());
m_event->Initialize(nullptr);
// Register the event.
Kernel::KEvent::Register(system.Kernel(), m_event);
ASSERT(R_SUCCEEDED(m_event->Signal()));
}

View File

@@ -33,6 +33,9 @@ ServerManager::ServerManager(Core::System& system) : m_system{system}, m_serve_m
// Initialize event.
m_event = Kernel::KEvent::Create(system.Kernel());
m_event->Initialize(nullptr);
// Register event.
Kernel::KEvent::Register(system.Kernel(), m_event);
}
ServerManager::~ServerManager() {
@@ -160,6 +163,9 @@ Result ServerManager::ManageDeferral(Kernel::KEvent** out_event) {
// Initialize the event.
m_deferral_event->Initialize(nullptr);
// Register the event.
Kernel::KEvent::Register(m_system.Kernel(), m_deferral_event);
// Set the output.
*out_event = m_deferral_event;

View File

@@ -64,6 +64,9 @@ Result ServiceManager::RegisterService(std::string name, u32 max_sessions,
auto* port = Kernel::KPort::Create(kernel);
port->Initialize(ServerSessionCountMax, false, 0);
// Register the port.
Kernel::KPort::Register(kernel, port);
service_ports.emplace(name, port);
registered_services.emplace(name, handler);
if (deferral_event) {

View File

@@ -49,6 +49,9 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) {
// Commit the session reservation.
session_reservation.Commit();
// Register the session.
Kernel::KSession::Register(system.Kernel(), session);
// Register with server manager.
session_manager->GetServerManager().RegisterSession(&session->GetServerSession(),
session_manager);

View File

@@ -462,7 +462,7 @@ struct Memory::Impl {
}
if (Settings::IsFastmemEnabled()) {
const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
}

View File

@@ -473,7 +473,8 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
}
void EmitSetSampleMask(EmitContext& ctx, Id value) {
ctx.OpStore(ctx.sample_mask, value);
const Id pointer{ctx.OpAccessChain(ctx.output_u32, ctx.sample_mask, ctx.u32_zero_value)};
ctx.OpStore(pointer, value);
}
void EmitSetFragDepth(EmitContext& ctx, Id value) {

View File

@@ -1572,7 +1572,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
}
if (info.stores_sample_mask) {
sample_mask = DefineOutput(*this, U32[1], std::nullopt);
const Id array_type{TypeArray(U32[1], Const(1U))};
sample_mask = DefineOutput(*this, array_type, std::nullopt);
Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask);
}
break;

View File

@@ -179,6 +179,8 @@ add_library(video_core STATIC
renderer_vulkan/vk_master_semaphore.h
renderer_vulkan/vk_pipeline_cache.cpp
renderer_vulkan/vk_pipeline_cache.h
renderer_vulkan/vk_present_manager.cpp
renderer_vulkan/vk_present_manager.h
renderer_vulkan/vk_query_cache.cpp
renderer_vulkan/vk_query_cache.h
renderer_vulkan/vk_rasterizer.cpp

View File

@@ -1426,7 +1426,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr,
.size = sub_size,
});
total_size_bytes += sub_size;
largest_copy = std::max(largest_copy, sub_size);
largest_copy = std::max<u64>(largest_copy, sub_size);
}
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);

View File

@@ -170,7 +170,8 @@ private:
std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
while (remaining_size > 0) {
const std::size_t copy_amount{std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
const std::size_t copy_amount{
std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
auto* manager{top_tier[page_index]};
if (manager) {
if constexpr (BOOL_BREAK) {
@@ -206,7 +207,8 @@ private:
u64 begin = std::numeric_limits<u64>::max();
u64 end = 0;
while (remaining_size > 0) {
const std::size_t copy_amount{std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
const std::size_t copy_amount{
std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
auto* manager{top_tier[page_index]};
const auto execute = [&] {
auto [new_begin, new_end] = func(manager, page_offset, copy_amount);

View File

@@ -126,15 +126,14 @@ constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{
PixelFormat::ASTC_2D_8X8_SRGB,
};
// Missing formats:
// PixelFormat::ASTC_2D_10X5_UNORM
// PixelFormat::ASTC_2D_10X5_SRGB
// Missing formats:
// PixelFormat::ASTC_2D_10X6_SRGB
constexpr std::array VIEW_CLASS_ASTC_10x5_RGBA{
PixelFormat::ASTC_2D_10X5_UNORM,
PixelFormat::ASTC_2D_10X5_SRGB,
};
constexpr std::array VIEW_CLASS_ASTC_10x6_RGBA{
PixelFormat::ASTC_2D_10X6_UNORM,
PixelFormat::ASTC_2D_10X6_SRGB,
};
constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{
@@ -147,9 +146,10 @@ constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{
PixelFormat::ASTC_2D_10X10_SRGB,
};
// Missing formats
// ASTC_2D_12X10_UNORM,
// ASTC_2D_12X10_SRGB,
constexpr std::array VIEW_CLASS_ASTC_12x10_RGBA{
PixelFormat::ASTC_2D_12X10_UNORM,
PixelFormat::ASTC_2D_12X10_SRGB,
};
constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{
PixelFormat::ASTC_2D_12X12_UNORM,
@@ -229,9 +229,11 @@ constexpr Table MakeViewTable() {
EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_10x5_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_10x6_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_12x10_RGBA);
EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
return view;
}

View File

@@ -4,13 +4,20 @@
#pragma once
#include <algorithm>
#include <condition_variable>
#include <cstring>
#include <deque>
#include <functional>
#include <memory>
#include <mutex>
#include <thread>
#include <queue>
#include "common/common_types.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/thread.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
#include "video_core/host1x/host1x.h"
@@ -23,15 +30,26 @@ class FenceBase {
public:
explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {}
bool IsStubbed() const {
return is_stubbed;
}
protected:
bool is_stubbed;
};
template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
template <typename Traits>
class FenceManager {
using TFence = typename Traits::FenceType;
using TTextureCache = typename Traits::TextureCacheType;
using TBufferCache = typename Traits::BufferCacheType;
using TQueryCache = typename Traits::QueryCacheType;
static constexpr bool can_async_check = Traits::HAS_ASYNC_CHECK;
public:
/// Notify the fence manager about a new frame
void TickFrame() {
std::unique_lock lock(ring_guard);
delayed_destruction_ring.Tick();
}
@@ -46,17 +64,33 @@ public:
}
void SignalFence(std::function<void()>&& func) {
TryReleasePendingFences();
rasterizer.InvalidateGPUCache();
bool delay_fence = Settings::IsGPULevelHigh();
if constexpr (!can_async_check) {
TryReleasePendingFences<false>();
}
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
uncommitted_operations.emplace_back(std::move(func));
CommitOperations();
TFence new_fence = CreateFence(!should_flush);
fences.push(new_fence);
if constexpr (can_async_check) {
guard.lock();
}
if (delay_fence) {
uncommitted_operations.emplace_back(std::move(func));
}
pending_operations.emplace_back(std::move(uncommitted_operations));
QueueFence(new_fence);
if (!delay_fence) {
func();
}
fences.push(std::move(new_fence));
if (should_flush) {
rasterizer.FlushCommands();
}
if constexpr (can_async_check) {
guard.unlock();
cv.notify_all();
}
}
void SignalSyncPoint(u32 value) {
@@ -66,29 +100,30 @@ public:
}
void WaitPendingFences() {
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait()) {
WaitFence(current_fence);
}
PopAsyncFlushes();
auto operations = std::move(pending_operations.front());
pending_operations.pop_front();
for (auto& operation : operations) {
operation();
}
PopFence();
if constexpr (!can_async_check) {
TryReleasePendingFences<true>();
}
}
protected:
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
TTextureCache& texture_cache_, TBufferCache& buffer_cache_,
TQueryCache& query_cache_)
: rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()},
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {
if constexpr (can_async_check) {
fence_thread =
std::jthread([this](std::stop_token token) { ReleaseThreadFunc(token); });
}
}
virtual ~FenceManager() = default;
virtual ~FenceManager() {
if constexpr (can_async_check) {
fence_thread.request_stop();
cv.notify_all();
fence_thread.join();
}
}
/// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
@@ -104,15 +139,20 @@ protected:
Tegra::GPU& gpu;
Tegra::Host1x::SyncpointManager& syncpoint_manager;
TTextureCache& texture_cache;
TTBufferCache& buffer_cache;
TBufferCache& buffer_cache;
TQueryCache& query_cache;
private:
template <bool force_wait>
void TryReleasePendingFences() {
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
return;
if constexpr (force_wait) {
WaitFence(current_fence);
} else {
return;
}
}
PopAsyncFlushes();
auto operations = std::move(pending_operations.front());
@@ -120,7 +160,49 @@ private:
for (auto& operation : operations) {
operation();
}
PopFence();
{
std::unique_lock lock(ring_guard);
delayed_destruction_ring.Push(std::move(current_fence));
}
fences.pop();
}
}
void ReleaseThreadFunc(std::stop_token stop_token) {
std::string name = "GPUFencingThread";
MicroProfileOnThreadCreate(name.c_str());
// Cleanup
SCOPE_EXIT({ MicroProfileOnThreadExit(); });
Common::SetCurrentThreadName(name.c_str());
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
TFence current_fence;
std::deque<std::function<void()>> current_operations;
while (!stop_token.stop_requested()) {
{
std::unique_lock lock(guard);
cv.wait(lock, [&] { return stop_token.stop_requested() || !fences.empty(); });
if (stop_token.stop_requested()) [[unlikely]] {
return;
}
current_fence = std::move(fences.front());
current_operations = std::move(pending_operations.front());
fences.pop();
pending_operations.pop_front();
}
if (!current_fence->IsStubbed()) {
WaitFence(current_fence);
}
PopAsyncFlushes();
for (auto& operation : current_operations) {
operation();
}
{
std::unique_lock lock(ring_guard);
delayed_destruction_ring.Push(std::move(current_fence));
}
}
}
@@ -154,19 +236,16 @@ private:
query_cache.CommitAsyncFlushes();
}
void PopFence() {
delayed_destruction_ring.Push(std::move(fences.front()));
fences.pop();
}
void CommitOperations() {
pending_operations.emplace_back(std::move(uncommitted_operations));
}
std::queue<TFence> fences;
std::deque<std::function<void()>> uncommitted_operations;
std::deque<std::deque<std::function<void()>>> pending_operations;
std::mutex guard;
std::mutex ring_guard;
std::condition_variable cv;
std::jthread fence_thread;
DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
};

View File

@@ -82,6 +82,7 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
}
PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const {
std::unique_lock<std::mutex> lock(guard);
return kind_map.GetValueAt(gpu_addr);
}
@@ -160,7 +161,10 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
}
remaining_size -= big_page_size;
}
kind_map.Map(gpu_addr, gpu_addr + size, kind);
{
std::unique_lock<std::mutex> lock(guard);
kind_map.Map(gpu_addr, gpu_addr + size, kind);
}
return gpu_addr;
}
@@ -553,6 +557,7 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const {
}
size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {
std::unique_lock<std::mutex> lock(guard);
return kind_map.GetContinuousSizeFrom(gpu_addr);
}
@@ -745,10 +750,10 @@ void MemoryManager::FlushCaching() {
return;
}
accumulator->Callback([this](GPUVAddr addr, size_t size) {
GetSubmappedRangeImpl<false>(addr, size, page_stash);
GetSubmappedRangeImpl<false>(addr, size, page_stash2);
});
rasterizer->InnerInvalidation(page_stash);
page_stash.clear();
rasterizer->InnerInvalidation(page_stash2);
page_stash2.clear();
accumulator->Clear();
}

View File

@@ -5,6 +5,7 @@
#include <atomic>
#include <map>
#include <mutex>
#include <optional>
#include <vector>
@@ -215,6 +216,9 @@ private:
std::vector<u64> big_page_continuous;
std::vector<std::pair<VAddr, std::size_t>> page_stash{};
std::vector<std::pair<VAddr, std::size_t>> page_stash2{};
mutable std::mutex guard;
static constexpr size_t continuous_bits = 64;

View File

@@ -6,6 +6,7 @@
#include <algorithm>
#include <array>
#include <cstring>
#include <functional>
#include <iterator>
#include <list>
#include <memory>
@@ -17,13 +18,19 @@
#include "common/assert.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/texture_cache/slot_vector.h"
namespace VideoCommon {
using AsyncJobId = SlotId;
static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0};
template <class QueryCache, class HostCounter>
class CounterStreamBase {
public:
@@ -93,9 +100,13 @@ private:
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_)
: rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {}
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_)
: rasterizer{rasterizer_},
cpu_memory{cpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {
(void)slot_async_jobs.insert(); // Null value
}
void InvalidateRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex};
@@ -126,10 +137,15 @@ public:
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
}
query->BindCounter(Stream(type).Current(), timestamp);
if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
AsyncFlushQuery(*cpu_addr);
auto result = query->BindCounter(Stream(type).Current(), timestamp);
if (result) {
auto async_job_id = query->GetAsyncJob();
auto& async_job = slot_async_jobs[async_job_id];
async_job.collected = true;
async_job.value = *result;
query->SetAsyncJob(NULL_ASYNC_JOB_ID);
}
AsyncFlushQuery(query, timestamp, lock);
}
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@@ -173,15 +189,18 @@ public:
}
void CommitAsyncFlushes() {
std::unique_lock lock{mutex};
committed_flushes.push_back(uncommitted_flushes);
uncommitted_flushes.reset();
}
bool HasUncommittedFlushes() const {
std::unique_lock lock{mutex};
return uncommitted_flushes != nullptr;
}
bool ShouldWaitAsyncFlushes() const {
std::unique_lock lock{mutex};
if (committed_flushes.empty()) {
return false;
}
@@ -189,6 +208,7 @@ public:
}
void PopAsyncFlushes() {
std::unique_lock lock{mutex};
if (committed_flushes.empty()) {
return;
}
@@ -197,15 +217,25 @@ public:
committed_flushes.pop_front();
return;
}
for (VAddr query_address : *flush_list) {
FlushAndRemoveRegion(query_address, 4);
for (AsyncJobId async_job_id : *flush_list) {
AsyncJob& async_job = slot_async_jobs[async_job_id];
if (!async_job.collected) {
FlushAndRemoveRegion(async_job.query_location, 2, true);
}
}
committed_flushes.pop_front();
}
private:
struct AsyncJob {
bool collected = false;
u64 value = 0;
VAddr query_location = 0;
std::optional<u64> timestamp{};
};
/// Flushes a memory range to guest memory and removes it from the cache.
void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
void FlushAndRemoveRegion(VAddr addr, std::size_t size, bool async = false) {
const u64 addr_begin = addr;
const u64 addr_end = addr_begin + size;
const auto in_range = [addr_begin, addr_end](const CachedQuery& query) {
@@ -226,7 +256,16 @@ private:
continue;
}
rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
query.Flush();
AsyncJobId async_job_id = query.GetAsyncJob();
auto flush_result = query.Flush(async);
if (async_job_id == NULL_ASYNC_JOB_ID) {
ASSERT_MSG(false, "This should not be reachable at all");
continue;
}
AsyncJob& async_job = slot_async_jobs[async_job_id];
async_job.collected = true;
async_job.value = flush_result;
query.SetAsyncJob(NULL_ASYNC_JOB_ID);
}
std::erase_if(contents, in_range);
}
@@ -253,26 +292,60 @@ private:
return found != std::end(contents) ? &*found : nullptr;
}
void AsyncFlushQuery(VAddr addr) {
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::vector<VAddr>>();
void AsyncFlushQuery(CachedQuery* query, std::optional<u64> timestamp,
std::unique_lock<std::recursive_mutex>& lock) {
const AsyncJobId new_async_job_id = slot_async_jobs.insert();
{
AsyncJob& async_job = slot_async_jobs[new_async_job_id];
query->SetAsyncJob(new_async_job_id);
async_job.query_location = query->GetCpuAddr();
async_job.collected = false;
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::vector<AsyncJobId>>();
}
uncommitted_flushes->push_back(new_async_job_id);
}
uncommitted_flushes->push_back(addr);
lock.unlock();
std::function<void()> operation([this, new_async_job_id, timestamp] {
std::unique_lock local_lock{mutex};
AsyncJob& async_job = slot_async_jobs[new_async_job_id];
u64 value = async_job.value;
VAddr address = async_job.query_location;
slot_async_jobs.erase(new_async_job_id);
local_lock.unlock();
if (timestamp) {
u64 timestamp_value = *timestamp;
cpu_memory.WriteBlockUnsafe(address + sizeof(u64), &timestamp_value, sizeof(u64));
cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64));
rasterizer.InvalidateRegion(address, sizeof(u64) * 2,
VideoCommon::CacheType::NoQueryCache);
} else {
u32 small_value = static_cast<u32>(value);
cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32));
rasterizer.InvalidateRegion(address, sizeof(u32),
VideoCommon::CacheType::NoQueryCache);
}
});
rasterizer.SyncOperation(std::move(operation));
}
static constexpr std::uintptr_t YUZU_PAGESIZE = 4096;
static constexpr unsigned YUZU_PAGEBITS = 12;
VideoCore::RasterizerInterface& rasterizer;
SlotVector<AsyncJob> slot_async_jobs;
std::recursive_mutex mutex;
VideoCore::RasterizerInterface& rasterizer;
Core::Memory::Memory& cpu_memory;
mutable std::recursive_mutex mutex;
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{};
std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes;
std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
};
template <class QueryCache, class HostCounter>
@@ -291,12 +364,12 @@ public:
virtual ~HostCounterBase() = default;
/// Returns the current value of the query.
u64 Query() {
u64 Query(bool async = false) {
if (result) {
return *result;
}
u64 value = BlockingQuery() + base_result;
u64 value = BlockingQuery(async) + base_result;
if (dependency) {
value += dependency->Query();
dependency = nullptr;
@@ -317,7 +390,7 @@ public:
protected:
/// Returns the value of query from the backend API blocking as needed.
virtual u64 BlockingQuery() const = 0;
virtual u64 BlockingQuery(bool async = false) const = 0;
private:
std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
@@ -340,26 +413,33 @@ public:
CachedQueryBase& operator=(const CachedQueryBase&) = delete;
/// Flushes the query to guest memory.
virtual void Flush() {
virtual u64 Flush(bool async = false) {
// When counter is nullptr it means that it's just been reset. We are supposed to write a
// zero in these cases.
const u64 value = counter ? counter->Query() : 0;
const u64 value = counter ? counter->Query(async) : 0;
if (async) {
return value;
}
std::memcpy(host_ptr, &value, sizeof(u64));
if (timestamp) {
std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
}
return value;
}
/// Binds a counter to this query.
void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
std::optional<u64> BindCounter(std::shared_ptr<HostCounter> counter_,
std::optional<u64> timestamp_) {
std::optional<u64> result{};
if (counter) {
// If there's an old counter set it means the query is being rewritten by the game.
// To avoid losing the data forever, flush here.
Flush();
result = std::make_optional(Flush());
}
counter = std::move(counter_);
timestamp = timestamp_;
return result;
}
VAddr GetCpuAddr() const noexcept {
@@ -374,6 +454,14 @@ public:
return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
}
void SetAsyncJob(AsyncJobId assigned_async_job_) {
assigned_async_job = assigned_async_job_;
}
AsyncJobId GetAsyncJob() const {
return assigned_async_job;
}
protected:
/// Returns true when querying the counter may potentially block.
bool WaitPending() const noexcept {
@@ -389,6 +477,7 @@ private:
u8* host_ptr; ///< Writable host pointer.
std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
AsyncJobId assigned_async_job;
};
} // namespace VideoCommon

View File

@@ -108,7 +108,8 @@ bool IsASTCSupported() {
[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
return nsight || HasExtension(extensions, "GL_EXT_debug_tool") ||
Settings::values.renderer_debug.GetValue();
}
} // Anonymous namespace

View File

@@ -30,7 +30,17 @@ private:
};
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
struct FenceManagerParams {
using FenceType = Fence;
using BufferCacheType = BufferCache;
using TextureCacheType = TextureCache;
using QueryCacheType = QueryCache;
static constexpr bool HAS_ASYNC_CHECK = false;
};
using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:

View File

@@ -26,8 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
: QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
: QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;
@@ -74,7 +74,7 @@ void HostCounter::EndQuery() {
glEndQuery(GetTarget(type));
}
u64 HostCounter::BlockingQuery() const {
u64 HostCounter::BlockingQuery([[maybe_unused]] bool async) const {
GLint64 value;
glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
return static_cast<u64>(value);
@@ -96,7 +96,7 @@ CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
return *this;
}
void CachedQuery::Flush() {
u64 CachedQuery::Flush([[maybe_unused]] bool async) {
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
// To avoid this disable and re-enable keeping the dependency stream.
// But we only have to do this if we have pending waits to be done.
@@ -106,11 +106,13 @@ void CachedQuery::Flush() {
stream.Update(false);
}
VideoCommon::CachedQueryBase<HostCounter>::Flush();
auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
if (slice_counter) {
stream.Update(true);
}
return result;
}
} // namespace OpenGL

View File

@@ -28,7 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(RasterizerOpenGL& rasterizer_);
explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -51,7 +51,7 @@ public:
void EndQuery();
private:
u64 BlockingQuery() const override;
u64 BlockingQuery(bool async = false) const override;
QueryCache& cache;
const VideoCore::QueryType type;
@@ -70,7 +70,7 @@ public:
CachedQuery(const CachedQuery&) = delete;
CachedQuery& operator=(const CachedQuery&) = delete;
void Flush() override;
u64 Flush(bool async = false) override;
private:
QueryCache* cache;

View File

@@ -63,7 +63,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),
query_cache(*this), accelerate_dma(buffer_cache, texture_cache),
query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
blit_image(program_manager_) {}

View File

@@ -861,9 +861,12 @@ GLuint Image::StorageHandle() noexcept {
case PixelFormat::ASTC_2D_8X5_SRGB:
case PixelFormat::ASTC_2D_5X4_SRGB:
case PixelFormat::ASTC_2D_5X5_SRGB:
case PixelFormat::ASTC_2D_10X5_SRGB:
case PixelFormat::ASTC_2D_10X6_SRGB:
case PixelFormat::ASTC_2D_10X8_SRGB:
case PixelFormat::ASTC_2D_6X6_SRGB:
case PixelFormat::ASTC_2D_10X10_SRGB:
case PixelFormat::ASTC_2D_12X10_SRGB:
case PixelFormat::ASTC_2D_12X12_SRGB:
case PixelFormat::ASTC_2D_8X6_SRGB:
case PixelFormat::ASTC_2D_6X5_SRGB:
@@ -1123,7 +1126,8 @@ bool Image::ScaleDown(bool ignore) {
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image, const SlotVector<Image>&)
: VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
: VideoCommon::ImageViewBase{info, image.info, image_id_, image.gpu_addr},
views{runtime.null_image_views} {
const Device& device = runtime.device;
if (True(image.flags & ImageFlagBits::Converted)) {
internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
@@ -1214,12 +1218,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
: VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info)
: VideoCommon::ImageViewBase{info, view_info} {}
: VideoCommon::ImageViewBase{info, view_info, 0} {}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
@@ -1279,7 +1283,7 @@ GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
ApplySwizzle(view.handle, format, casted_swizzle);
}
if (set_object_label) {
const std::string name = VideoCommon::Name(*this);
const std::string name = VideoCommon::Name(*this, gpu_addr);
glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data());
}
return view.handle;

View File

@@ -314,7 +314,6 @@ private:
std::unique_ptr<StorageViews> storage_views;
GLenum internal_format = GL_NONE;
GLuint default_handle = 0;
GPUVAddr gpu_addr = 0;
u32 buffer_size = 0;
GLuint original_texture = 0;
int num_samples = 0;

View File

@@ -100,10 +100,13 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x6_KHR}, // ASTC_2D_10X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR}, // ASTC_2D_10X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x5_KHR}, // ASTC_2D_10X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR}, // ASTC_2D_10X5_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
{GL_COMPRESSED_RGBA_ASTC_12x10_KHR}, // ASTC_2D_12X10_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR}, // ASTC_2D_12X10_SRGB
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM

View File

@@ -197,10 +197,13 @@ struct FormatTuple {
{VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM
{VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB
{VK_FORMAT_ASTC_10x6_UNORM_BLOCK}, // ASTC_2D_10X6_UNORM
{VK_FORMAT_ASTC_10x6_SRGB_BLOCK}, // ASTC_2D_10X6_SRGB
{VK_FORMAT_ASTC_10x5_UNORM_BLOCK}, // ASTC_2D_10X5_UNORM
{VK_FORMAT_ASTC_10x5_SRGB_BLOCK}, // ASTC_2D_10X5_SRGB
{VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM
{VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB
{VK_FORMAT_ASTC_12x10_UNORM_BLOCK}, // ASTC_2D_12X10_UNORM
{VK_FORMAT_ASTC_12x10_SRGB_BLOCK}, // ASTC_2D_12X10_SRGB
{VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM
{VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB
{VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM

View File

@@ -93,8 +93,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
state_tracker(), scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
screen_info),
present_manager(render_window, device, memory_allocator, scheduler, swapchain),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager,
scheduler, screen_info),
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
state_tracker, scheduler) {
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
@@ -121,46 +122,19 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
return;
}
// Update screen info if the framebuffer size has changed.
if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) {
screen_info.width = framebuffer->width;
screen_info.height = framebuffer->height;
}
screen_info.width = framebuffer->width;
screen_info.height = framebuffer->height;
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
RenderScreenshot(*framebuffer, use_accelerated);
bool has_been_recreated = false;
const auto recreate_swapchain = [&](u32 width, u32 height) {
if (!has_been_recreated) {
has_been_recreated = true;
scheduler.Finish();
}
swapchain.Create(width, height, is_srgb);
};
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
if (swapchain.NeedsRecreation(is_srgb) || swapchain.GetWidth() != layout.width ||
swapchain.GetHeight() != layout.height) {
recreate_swapchain(layout.width, layout.height);
}
bool is_outdated;
do {
swapchain.AcquireNextImage();
is_outdated = swapchain.IsOutDated();
if (is_outdated) {
recreate_swapchain(layout.width, layout.height);
}
} while (is_outdated);
if (has_been_recreated) {
blit_screen.Recreate();
}
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
scheduler.Flush(render_semaphore, present_semaphore);
scheduler.WaitWorker();
swapchain.Present(render_semaphore);
Frame* frame = present_manager.GetRenderFrame();
blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
scheduler.Flush(*frame->render_ready);
present_manager.Present(frame);
gpu.RendererFrameEndNotify();
rasterizer.TickFrame();
@@ -246,8 +220,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
});
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
// Since we're not rendering to the screen, ignore the render semaphore.
void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated);
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
const VkBufferCreateInfo dst_buffer_info{
@@ -270,7 +243,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,

View File

@@ -9,6 +9,7 @@
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_present_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -76,6 +77,7 @@ private:
StateTracker state_tracker;
Scheduler scheduler;
Swapchain swapchain;
PresentManager present_manager;
BlitScreen blit_screen;
RasterizerVulkan rasterizer;
std::optional<TurboMode> turbo_mode;

View File

@@ -122,10 +122,12 @@ struct BlitScreen::BufferData {
BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_,
const Device& device_, MemoryAllocator& memory_allocator_,
Swapchain& swapchain_, Scheduler& scheduler_, const ScreenInfo& screen_info_)
Swapchain& swapchain_, PresentManager& present_manager_,
Scheduler& scheduler_, const ScreenInfo& screen_info_)
: cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_},
scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_},
current_srgb{swapchain.IsSrgb()}, image_view_format{swapchain.GetImageViewFormat()} {
resource_ticks.resize(image_count);
CreateStaticResources();
@@ -135,25 +137,20 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin
BlitScreen::~BlitScreen() = default;
void BlitScreen::Recreate() {
present_manager.WaitPresent();
scheduler.Finish();
device.GetLogical().WaitIdle();
CreateDynamicResources();
}
VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated) {
void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout,
VkExtent2D render_area, bool use_accelerated) {
RefreshResources(framebuffer);
// Finish any pending renderpass
scheduler.RequestOutsideRenderPassOperationContext();
if (const auto swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count) {
image_count = swapchain_images;
Recreate();
}
const std::size_t image_index = swapchain.GetImageIndex();
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
@@ -169,7 +166,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
const u64 image_offset = GetRawImageOffset(framebuffer);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
@@ -204,8 +201,8 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
.depth = 1,
},
};
scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) {
const VkImage image = *raw_images[image_index];
scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) {
const VkImage image = *raw_images[index];
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -245,14 +242,15 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue();
if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) {
UpdateAADescriptorSet(image_index, source_image_view, false);
UpdateAADescriptorSet(source_image_view, false);
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
};
scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) {
scheduler.Record([this, index = image_index, size,
anti_alias_pass](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -326,7 +324,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0,
aa_descriptor_sets[image_index], {});
aa_descriptor_sets[index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
@@ -369,81 +367,99 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
};
VkImageView fsr_image_view =
fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
UpdateDescriptorSet(image_index, fsr_image_view, true);
UpdateDescriptorSet(fsr_image_view, true);
} else {
const bool is_nn =
Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor;
UpdateDescriptorSet(image_index, source_image_view, is_nn);
UpdateDescriptorSet(source_image_view, is_nn);
}
scheduler.Record(
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = *renderpass,
.framebuffer = host_framebuffer,
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
auto graphics_pipeline = [this]() {
switch (Settings::values.scaling_filter.GetValue()) {
case Settings::ScalingFilter::NearestNeighbor:
case Settings::ScalingFilter::Bilinear:
return *bilinear_pipeline;
case Settings::ScalingFilter::Bicubic:
return *bicubic_pipeline;
case Settings::ScalingFilter::Gaussian:
return *gaussian_pipeline;
case Settings::ScalingFilter::ScaleForce:
return *scaleforce_pipeline;
default:
return *bilinear_pipeline;
}
}();
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
scheduler.Record([this, host_framebuffer, index = image_index,
size = render_area](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = *renderpass,
.framebuffer = host_framebuffer,
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
auto graphics_pipeline = [this]() {
switch (Settings::values.scaling_filter.GetValue()) {
case Settings::ScalingFilter::NearestNeighbor:
case Settings::ScalingFilter::Bilinear:
return *bilinear_pipeline;
case Settings::ScalingFilter::Bicubic:
return *bicubic_pipeline;
case Settings::ScalingFilter::Gaussian:
return *gaussian_pipeline;
case Settings::ScalingFilter::ScaleForce:
return *scaleforce_pipeline;
default:
return *bilinear_pipeline;
}
}();
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
descriptor_sets[image_index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
return *semaphores[image_index];
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
descriptor_sets[index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
}
VkSemaphore BlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated) {
const std::size_t image_index = swapchain.GetImageIndex();
const VkExtent2D render_area = swapchain.GetSize();
void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated, bool is_srgb) {
// Recreate dynamic resources if the the image count or colorspace changed
if (const std::size_t swapchain_images = swapchain.GetImageCount();
swapchain_images != image_count || current_srgb != is_srgb) {
current_srgb = is_srgb;
image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
image_count = swapchain_images;
Recreate();
}
// Recreate the presentation frame if the dimensions of the window changed
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated);
if (layout.width != frame->width || layout.height != frame->height ||
is_srgb != frame->is_srgb) {
Recreate();
present_manager.RecreateFrame(frame, layout.width, layout.height, is_srgb,
image_view_format, *renderpass);
}
const VkExtent2D render_area{frame->width, frame->height};
Draw(framebuffer, *frame->framebuffer, layout, render_area, use_accelerated);
if (++image_index >= image_count) {
image_index = 0;
}
}
vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
@@ -471,13 +487,11 @@ void BlitScreen::CreateStaticResources() {
}
void BlitScreen::CreateDynamicResources() {
CreateSemaphores();
CreateDescriptorPool();
CreateDescriptorSetLayout();
CreateDescriptorSets();
CreatePipelineLayout();
CreateRenderPass();
CreateFramebuffers();
CreateGraphicsPipeline();
fsr.reset();
smaa.reset();
@@ -525,11 +539,6 @@ void BlitScreen::CreateShaders() {
}
}
void BlitScreen::CreateSemaphores() {
semaphores.resize(image_count);
std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
}
void BlitScreen::CreateDescriptorPool() {
const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
{
@@ -571,10 +580,10 @@ void BlitScreen::CreateDescriptorPool() {
}
void BlitScreen::CreateRenderPass() {
renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat());
renderpass = CreateRenderPassImpl(image_view_format);
}
vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) {
vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format) {
const VkAttachmentDescription color_attachment{
.flags = 0,
.format = format,
@@ -584,7 +593,7 @@ vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkAttachmentReference color_attachment_ref{
@@ -1052,16 +1061,6 @@ void BlitScreen::CreateSampler() {
nn_sampler = device.GetLogical().CreateSampler(ci_nn);
}
void BlitScreen::CreateFramebuffers() {
const VkExtent2D size{swapchain.GetSize()};
framebuffers.resize(image_count);
for (std::size_t i = 0; i < image_count; ++i) {
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
framebuffers[i] = CreateFramebuffer(image_view, size, renderpass);
}
}
void BlitScreen::ReleaseRawImages() {
for (const u64 tick : resource_ticks) {
scheduler.Wait(tick);
@@ -1175,7 +1174,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
return;
}
aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false);
aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer));
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{
@@ -1319,8 +1318,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
}
void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view,
bool nn) const {
void BlitScreen::UpdateAADescriptorSet(VkImageView image_view, bool nn) const {
const VkDescriptorImageInfo image_info{
.sampler = nn ? *nn_sampler : *sampler,
.imageView = image_view,
@@ -1356,8 +1354,7 @@ void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView imag
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
}
void BlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view,
bool nn) const {
void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const {
const VkDescriptorBufferInfo buffer_info{
.buffer = *buffer,
.offset = offsetof(BufferData, uniform),
@@ -1480,8 +1477,7 @@ u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer)
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
}
u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const {
u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const {
constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData));
return first_image_offset + GetSizeInBytes(framebuffer) * image_index;
}

View File

@@ -5,6 +5,7 @@
#include <memory>
#include "core/frontend/framebuffer_layout.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -42,6 +43,9 @@ class RasterizerVulkan;
class Scheduler;
class SMAA;
class Swapchain;
class PresentManager;
struct Frame;
struct ScreenInfo {
VkImage image{};
@@ -55,18 +59,17 @@ class BlitScreen {
public:
explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window,
const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
Scheduler& scheduler, const ScreenInfo& screen_info);
PresentManager& present_manager, Scheduler& scheduler,
const ScreenInfo& screen_info);
~BlitScreen();
void Recreate();
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated);
void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated);
[[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated);
void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated, bool is_srgb);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
@@ -79,10 +82,9 @@ private:
void CreateStaticResources();
void CreateShaders();
void CreateSemaphores();
void CreateDescriptorPool();
void CreateRenderPass();
vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
vk::RenderPass CreateRenderPassImpl(VkFormat format);
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
@@ -90,15 +92,14 @@ private:
void CreateSampler();
void CreateDynamicResources();
void CreateFramebuffers();
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
void ReleaseRawImages();
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void UpdateDescriptorSet(VkImageView image_view, bool nn) const;
void UpdateAADescriptorSet(VkImageView image_view, bool nn) const;
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
@@ -107,16 +108,17 @@ private:
void CreateFSR();
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const;
Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
const Device& device;
MemoryAllocator& memory_allocator;
Swapchain& swapchain;
PresentManager& present_manager;
Scheduler& scheduler;
std::size_t image_count;
std::size_t image_index{};
const ScreenInfo& screen_info;
vk::ShaderModule vertex_shader;
@@ -135,7 +137,6 @@ private:
vk::Pipeline gaussian_pipeline;
vk::Pipeline scaleforce_pipeline;
vk::RenderPass renderpass;
std::vector<vk::Framebuffer> framebuffers;
vk::DescriptorSets descriptor_sets;
vk::Sampler nn_sampler;
vk::Sampler sampler;
@@ -145,7 +146,6 @@ private:
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> semaphores;
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
std::vector<MemoryCommit> raw_buffer_commits;
@@ -164,6 +164,8 @@ private:
u32 raw_width = 0;
u32 raw_height = 0;
Service::android::PixelFormat pixel_format{};
bool current_srgb;
VkFormat image_view_format;
std::unique_ptr<FSR> fsr;
std::unique_ptr<SMAA> smaa;

View File

@@ -5,6 +5,7 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"

View File

@@ -40,7 +40,16 @@ private:
};
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
struct FenceManagerParams {
using FenceType = Fence;
using BufferCacheType = BufferCache;
using TextureCacheType = TextureCache;
using QueryCacheType = QueryCache;
static constexpr bool HAS_ASYNC_CHECK = true;
};
using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
class FenceManager final : public GenericFenceManager {
public:

View File

@@ -0,0 +1,457 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/microprofile.h"
#include "common/settings.h"
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_present_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128));
MICROPROFILE_DEFINE(Vulkan_CopyToSwapchain, "Vulkan", "Copy to swapchain", MP_RGB(192, 255, 192));
namespace {
bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat format) {
const VkFormatProperties props{physical_device.GetFormatProperties(format)};
return (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT);
}
[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers() {
return VkImageSubresourceLayers{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
};
}
[[nodiscard]] VkImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width,
s32 swapchain_height) {
return VkImageBlit{
.srcSubresource = MakeImageSubresourceLayers(),
.srcOffsets =
{
{
.x = 0,
.y = 0,
.z = 0,
},
{
.x = frame_width,
.y = frame_height,
.z = 1,
},
},
.dstSubresource = MakeImageSubresourceLayers(),
.dstOffsets =
{
{
.x = 0,
.y = 0,
.z = 0,
},
{
.x = swapchain_width,
.y = swapchain_height,
.z = 1,
},
},
};
}
[[nodiscard]] VkImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width,
u32 swapchain_height) {
return VkImageCopy{
.srcSubresource = MakeImageSubresourceLayers(),
.srcOffset =
{
.x = 0,
.y = 0,
.z = 0,
},
.dstSubresource = MakeImageSubresourceLayers(),
.dstOffset =
{
.x = 0,
.y = 0,
.z = 0,
},
.extent =
{
.width = std::min(frame_width, swapchain_width),
.height = std::min(frame_height, swapchain_height),
.depth = 1,
},
};
}
} // Anonymous namespace
PresentManager::PresentManager(Core::Frontend::EmuWindow& render_window_, const Device& device_,
MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
Swapchain& swapchain_)
: render_window{render_window_}, device{device_},
memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_},
blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())},
use_present_thread{Settings::values.async_presentation.GetValue()},
image_count{swapchain.GetImageCount()} {
auto& dld = device.GetLogical();
cmdpool = dld.CreateCommandPool({
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.pNext = nullptr,
.flags =
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = device.GetGraphicsFamily(),
});
auto cmdbuffers = cmdpool.Allocate(image_count);
frames.resize(image_count);
for (u32 i = 0; i < frames.size(); i++) {
Frame& frame = frames[i];
frame.cmdbuf = vk::CommandBuffer{cmdbuffers[i], device.GetDispatchLoader()};
frame.render_ready = dld.CreateSemaphore({
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
});
frame.present_done = dld.CreateFence({
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_FENCE_CREATE_SIGNALED_BIT,
});
free_queue.push(&frame);
}
if (use_present_thread) {
present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); });
}
}
PresentManager::~PresentManager() = default;
Frame* PresentManager::GetRenderFrame() {
MICROPROFILE_SCOPE(Vulkan_WaitPresent);
// Wait for free presentation frames
std::unique_lock lock{free_mutex};
free_cv.wait(lock, [this] { return !free_queue.empty(); });
// Take the frame from the queue
Frame* frame = free_queue.front();
free_queue.pop();
// Wait for the presentation to be finished so all frame resources are free
frame->present_done.Wait();
frame->present_done.Reset();
return frame;
}
void PresentManager::Present(Frame* frame) {
if (!use_present_thread) {
scheduler.WaitWorker();
CopyToSwapchain(frame);
free_queue.push(frame);
return;
}
scheduler.Record([this, frame](vk::CommandBuffer) {
std::unique_lock lock{queue_mutex};
present_queue.push(frame);
frame_cv.notify_one();
});
}
void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
VkFormat image_view_format, VkRenderPass rd) {
auto& dld = device.GetLogical();
frame->width = width;
frame->height = height;
frame->is_srgb = is_srgb;
frame->image = dld.CreateImage({
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
.imageType = VK_IMAGE_TYPE_2D,
.format = swapchain.GetImageFormat(),
.extent =
{
.width = width,
.height = height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal);
frame->image_view = dld.CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *frame->image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = image_view_format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
});
const VkImageView image_view{*frame->image_view};
frame->framebuffer = dld.CreateFramebuffer({
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.renderPass = rd,
.attachmentCount = 1,
.pAttachments = &image_view,
.width = width,
.height = height,
.layers = 1,
});
}
void PresentManager::WaitPresent() {
if (!use_present_thread) {
return;
}
// Wait for the present queue to be empty
{
std::unique_lock queue_lock{queue_mutex};
frame_cv.wait(queue_lock, [this] { return present_queue.empty(); });
}
// The above condition will be satisfied when the last frame is taken from the queue.
// To ensure that frame has been presented as well take hold of the swapchain
// mutex.
std::scoped_lock swapchain_lock{swapchain_mutex};
}
void PresentManager::PresentThread(std::stop_token token) {
Common::SetCurrentThreadName("VulkanPresent");
while (!token.stop_requested()) {
std::unique_lock lock{queue_mutex};
// Wait for presentation frames
Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); });
if (token.stop_requested()) {
return;
}
// Take the frame and notify anyone waiting
Frame* frame = present_queue.front();
present_queue.pop();
frame_cv.notify_one();
// By exchanging the lock ownership we take the swapchain lock
// before the queue lock goes out of scope. This way the swapchain
// lock in WaitPresent is guaranteed to occur after here.
std::exchange(lock, std::unique_lock{swapchain_mutex});
CopyToSwapchain(frame);
// Free the frame for reuse
std::scoped_lock fl{free_mutex};
free_queue.push(frame);
free_cv.notify_one();
}
}
void PresentManager::CopyToSwapchain(Frame* frame) {
MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
const auto recreate_swapchain = [&] {
swapchain.Create(frame->width, frame->height, frame->is_srgb);
image_count = swapchain.GetImageCount();
};
// If the size or colorspace of the incoming frames has changed, recreate the swapchain
// to account for that.
const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb);
const bool size_changed =
swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height;
if (srgb_changed || size_changed) {
recreate_swapchain();
}
while (swapchain.AcquireNextImage()) {
recreate_swapchain();
}
const vk::CommandBuffer cmdbuf{frame->cmdbuf};
cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
const VkImage image{swapchain.CurrentImage()};
const VkExtent2D extent = swapchain.GetExtent();
const std::array pre_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *frame->image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
};
const std::array post_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *frame->image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {},
{}, {}, pre_barriers);
if (blit_supported) {
cmdbuf.BlitImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
VK_FILTER_LINEAR);
} else {
cmdbuf.CopyImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
MakeImageCopy(frame->width, frame->height, extent.width, extent.height));
}
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, {},
{}, {}, post_barriers);
cmdbuf.End();
const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
const VkSemaphore render_semaphore = swapchain.CurrentRenderSemaphore();
const std::array wait_semaphores = {present_semaphore, *frame->render_ready};
static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
};
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = nullptr,
.waitSemaphoreCount = 2U,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),
.signalSemaphoreCount = 1U,
.pSignalSemaphores = &render_semaphore,
};
// Submit the image copy/blit to the swapchain
{
std::scoped_lock lock{scheduler.submit_mutex};
switch (const VkResult result =
device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
device.ReportLoss();
[[fallthrough]];
default:
vk::Check(result);
break;
}
}
// Present
swapchain.Present(render_semaphore);
}
} // namespace Vulkan

View File

@@ -0,0 +1,83 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <mutex>
#include <queue>
#include "common/common_types.h"
#include "common/polyfill_thread.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core::Frontend {
class EmuWindow;
} // namespace Core::Frontend
namespace Vulkan {
class Device;
class Scheduler;
class Swapchain;
struct Frame {
u32 width;
u32 height;
bool is_srgb;
vk::Image image;
vk::ImageView image_view;
vk::Framebuffer framebuffer;
MemoryCommit image_commit;
vk::CommandBuffer cmdbuf;
vk::Semaphore render_ready;
vk::Fence present_done;
};
class PresentManager {
public:
PresentManager(Core::Frontend::EmuWindow& render_window, const Device& device,
MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain);
~PresentManager();
/// Returns the last used presentation frame
Frame* GetRenderFrame();
/// Pushes a frame for presentation
void Present(Frame* frame);
/// Recreates the present frame to match the provided parameters
void RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
VkFormat image_view_format, VkRenderPass rd);
/// Waits for the present thread to finish presenting all queued frames.
void WaitPresent();
private:
void PresentThread(std::stop_token token);
void CopyToSwapchain(Frame* frame);
private:
Core::Frontend::EmuWindow& render_window;
const Device& device;
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
Swapchain& swapchain;
vk::CommandPool cmdpool;
std::vector<Frame> frames;
std::queue<Frame*> present_queue;
std::queue<Frame*> free_queue;
std::condition_variable_any frame_cv;
std::condition_variable free_cv;
std::mutex swapchain_mutex;
std::mutex queue_mutex;
std::mutex free_mutex;
std::jthread present_thread;
bool blit_supported;
bool use_present_thread;
std::size_t image_count;
};
} // namespace Vulkan

View File

@@ -66,9 +66,10 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
}
}
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
Scheduler& scheduler_)
: QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
: QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_},
query_pools{
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
} {}
@@ -98,8 +99,10 @@ HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> depend
query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
const vk::Device* logical = &cache.GetDevice().GetLogical();
cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
const bool use_precise = Settings::IsGPULevelHigh();
logical->ResetQueryPool(query.first, query.second, 1);
cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
cmdbuf.BeginQuery(query.first, query.second,
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
});
}
@@ -112,8 +115,10 @@ void HostCounter::EndQuery() {
[query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
}
u64 HostCounter::BlockingQuery() const {
cache.GetScheduler().Wait(tick);
u64 HostCounter::BlockingQuery(bool async) const {
if (!async) {
cache.GetScheduler().Wait(tick);
}
u64 data;
const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
query.first, query.second, 1, sizeof(data), &data, sizeof(data),

View File

@@ -52,7 +52,8 @@ private:
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
Scheduler& scheduler_);
~QueryCache();
@@ -83,7 +84,7 @@ public:
void EndQuery();
private:
u64 BlockingQuery() const override;
u64 BlockingQuery(bool async = false) const override;
QueryCache& cache;
const VideoCore::QueryType type;

View File

@@ -172,7 +172,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
query_cache{*this, device, scheduler}, accelerate_dma(buffer_cache, texture_cache, scheduler),
query_cache{*this, cpu_memory_, device, scheduler},
accelerate_dma(buffer_cache, texture_cache, scheduler),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
@@ -675,7 +676,8 @@ bool RasterizerVulkan::AccelerateConditionalRendering() {
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
Maxwell::ReportSemaphore::Compare cmp;
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
VideoCommon::CacheType::BufferCache)) {
VideoCommon::CacheType::BufferCache |
VideoCommon::CacheType::QueryCache)) {
return true;
}
return false;

View File

@@ -46,10 +46,11 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
Scheduler::~Scheduler() = default;
void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
u64 Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
// When flushing, we only send data to the worker thread; no waiting is necessary.
SubmitExecution(signal_semaphore, wait_semaphore);
const u64 signal_value = SubmitExecution(signal_semaphore, wait_semaphore);
AllocateNewContext();
return signal_value;
}
void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
@@ -205,7 +206,7 @@ void Scheduler::AllocateWorkerCommandBuffer() {
});
}
void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
EndPendingOperations();
InvalidateState();
@@ -217,6 +218,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
on_submit();
}
std::scoped_lock lock{submit_mutex};
switch (const VkResult result = master_semaphore->SubmitQueue(
cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
case VK_SUCCESS:
@@ -231,6 +233,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
});
chunk->MarkSubmit();
DispatchWork();
return signal_value;
}
void Scheduler::AllocateNewContext() {

View File

@@ -34,7 +34,7 @@ public:
~Scheduler();
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
u64 Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
@@ -106,6 +106,8 @@ public:
return *master_semaphore;
}
std::mutex submit_mutex;
private:
class Command {
public:
@@ -201,7 +203,7 @@ private:
void AllocateWorkerCommandBuffer();
void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
u64 SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
void AllocateNewContext();

View File

@@ -99,18 +99,16 @@ void Swapchain::Create(u32 width_, u32 height_, bool srgb) {
return;
}
device.GetLogical().WaitIdle();
Destroy();
CreateSwapchain(capabilities, srgb);
CreateSemaphores();
CreateImageViews();
resource_ticks.clear();
resource_ticks.resize(image_count);
}
void Swapchain::AcquireNextImage() {
bool Swapchain::AcquireNextImage() {
const VkResult result = device.GetLogical().AcquireNextImageKHR(
*swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
VK_NULL_HANDLE, &image_index);
@@ -127,8 +125,11 @@ void Swapchain::AcquireNextImage() {
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
break;
}
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
return is_suboptimal || is_outdated;
}
void Swapchain::Present(VkSemaphore render_semaphore) {
@@ -143,6 +144,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
.pImageIndices = &image_index,
.pResults = nullptr,
};
std::scoped_lock lock{scheduler.submit_mutex};
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
break;
@@ -168,7 +170,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)};
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
surface_format = ChooseSwapSurfaceFormat(formats);
present_mode = ChooseSwapPresentMode(present_modes);
u32 requested_image_count{capabilities.minImageCount + 1};
@@ -193,7 +195,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
.imageColorSpace = surface_format.colorSpace,
.imageExtent = {},
.imageArrayLayers = 1,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@@ -241,45 +243,14 @@ void Swapchain::CreateSemaphores() {
present_semaphores.resize(image_count);
std::ranges::generate(present_semaphores,
[this] { return device.GetLogical().CreateSemaphore(); });
}
void Swapchain::CreateImageViews() {
VkImageViewCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = {},
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = image_view_format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
image_views.resize(image_count);
for (std::size_t i = 0; i < image_count; i++) {
ci.image = images[i];
image_views[i] = device.GetLogical().CreateImageView(ci);
}
render_semaphores.resize(image_count);
std::ranges::generate(render_semaphores,
[this] { return device.GetLogical().CreateSemaphore(); });
}
void Swapchain::Destroy() {
frame_index = 0;
present_semaphores.clear();
framebuffers.clear();
image_views.clear();
swapchain.reset();
}

View File

@@ -27,7 +27,7 @@ public:
void Create(u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
void AcquireNextImage();
bool AcquireNextImage();
/// Presents the rendered image to the swapchain.
void Present(VkSemaphore render_semaphore);
@@ -52,6 +52,11 @@ public:
return is_suboptimal;
}
/// Returns true when the swapchain format is in the srgb color space
bool IsSrgb() const {
return current_srgb;
}
VkExtent2D GetSize() const {
return extent;
}
@@ -64,22 +69,34 @@ public:
return image_index;
}
std::size_t GetFrameIndex() const {
return frame_index;
}
VkImage GetImageIndex(std::size_t index) const {
return images[index];
}
VkImageView GetImageViewIndex(std::size_t index) const {
return *image_views[index];
VkImage CurrentImage() const {
return images[image_index];
}
VkFormat GetImageViewFormat() const {
return image_view_format;
}
VkFormat GetImageFormat() const {
return surface_format.format;
}
VkSemaphore CurrentPresentSemaphore() const {
return *present_semaphores[frame_index];
}
VkSemaphore CurrentRenderSemaphore() const {
return *render_semaphores[frame_index];
}
u32 GetWidth() const {
return width;
}
@@ -88,6 +105,10 @@ public:
return height;
}
VkExtent2D GetExtent() const {
return extent;
}
private:
void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb);
void CreateSemaphores();
@@ -107,10 +128,9 @@ private:
std::size_t image_count{};
std::vector<VkImage> images;
std::vector<vk::ImageView> image_views;
std::vector<vk::Framebuffer> framebuffers;
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> present_semaphores;
std::vector<vk::Semaphore> render_semaphores;
u32 width;
u32 height;
@@ -121,6 +141,7 @@ private:
VkFormat image_view_format{};
VkExtent2D extent{};
VkPresentModeKHR present_mode{};
VkSurfaceFormatKHR surface_format{};
bool current_srgb{};
bool current_fps_unlocked{};

View File

@@ -1584,8 +1584,9 @@ bool Image::NeedsScaleHelper() const {
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image)
: VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
image_handle{image.Handle()}, samples(ConvertSampleCount(image.info.num_samples)) {
: VideoCommon::ImageViewBase{info, image.info, image_id_, image.gpu_addr},
device{&runtime.device}, image_handle{image.Handle()},
samples(ConvertSampleCount(image.info.num_samples)) {
using Shader::TextureType;
const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
@@ -1631,7 +1632,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
}
vk::ImageView handle = device->GetLogical().CreateImageView(ci);
if (device->HasDebuggingToolAttached()) {
handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
handle.SetObjectNameEXT(VideoCommon::Name(*this, gpu_addr).c_str());
}
image_views[static_cast<size_t>(tex_type)] = std::move(handle);
};
@@ -1672,7 +1673,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
: VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params)

View File

@@ -265,7 +265,6 @@ private:
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
GPUVAddr gpu_addr = 0;
u32 buffer_size = 0;
};

View File

@@ -14,13 +14,18 @@ namespace Vulkan {
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
payload_start = payload.data();
payload_cursor = payload.data();
}
UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
void UpdateDescriptorQueue::TickFrame() {
payload_cursor = payload.data();
if (++frame_index >= FRAMES_IN_FLIGHT) {
frame_index = 0;
}
payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE;
payload_cursor = payload_start;
}
void UpdateDescriptorQueue::Acquire() {
@@ -28,10 +33,10 @@ void UpdateDescriptorQueue::Acquire() {
// This is the maximum number of entries a single draw call might use.
static constexpr size_t MIN_ENTRIES = 0x400;
if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
payload_cursor = payload.data();
payload_cursor = payload_start;
}
upload_start = payload_cursor;
}

View File

@@ -29,6 +29,12 @@ struct DescriptorUpdateEntry {
};
class UpdateDescriptorQueue final {
// This should be plenty for the vast majority of cases. Most desktop platforms only
// provide up to 3 swapchain images.
static constexpr size_t FRAMES_IN_FLIGHT = 5;
static constexpr size_t FRAME_PAYLOAD_SIZE = 0x10000;
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
public:
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
~UpdateDescriptorQueue();
@@ -73,9 +79,11 @@ private:
const Device& device;
Scheduler& scheduler;
size_t frame_index{0};
DescriptorUpdateEntry* payload_cursor = nullptr;
DescriptorUpdateEntry* payload_start = nullptr;
const DescriptorUpdateEntry* upload_start = nullptr;
std::array<DescriptorUpdateEntry, 0x10000> payload;
std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload;
};
} // namespace Vulkan

View File

@@ -228,14 +228,14 @@ const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu
auto info = std::make_unique<ShaderInfo>();
if (const std::optional<u64> cached_hash{env.Analyze()}) {
info->unique_hash = *cached_hash;
info->size_bytes = env.CachedSize();
info->size_bytes = env.CachedSizeBytes();
} else {
// Slow path, not really hit on commercial games
// Build a control flow graph to get the real shader size
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
info->unique_hash = env.CalculateHash();
info->size_bytes = env.ReadSize();
info->size_bytes = env.ReadSizeBytes();
}
const size_t size_bytes{info->size_bytes};
const ShaderInfo* const result{info.get()};

View File

@@ -170,15 +170,19 @@ std::optional<u64> GenericEnvironment::Analyze() {
void GenericEnvironment::SetCachedSize(size_t size_bytes) {
cached_lowest = start_address;
cached_highest = start_address + static_cast<u32>(size_bytes);
code.resize(CachedSize());
code.resize(CachedSizeWords());
gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
}
size_t GenericEnvironment::CachedSize() const noexcept {
return cached_highest - cached_lowest + INST_SIZE;
size_t GenericEnvironment::CachedSizeWords() const noexcept {
return CachedSizeBytes() / INST_SIZE;
}
size_t GenericEnvironment::ReadSize() const noexcept {
size_t GenericEnvironment::CachedSizeBytes() const noexcept {
return static_cast<size_t>(cached_highest) - cached_lowest + INST_SIZE;
}
size_t GenericEnvironment::ReadSizeBytes() const noexcept {
return read_highest - read_lowest + INST_SIZE;
}
@@ -187,7 +191,7 @@ bool GenericEnvironment::CanBeSerialized() const noexcept {
}
u64 GenericEnvironment::CalculateHash() const {
const size_t size{ReadSize()};
const size_t size{ReadSizeBytes()};
const auto data{std::make_unique<char[]>(size)};
gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
return Common::CityHash64(data.get(), size);
@@ -198,7 +202,7 @@ void GenericEnvironment::Dump(u64 hash) {
}
void GenericEnvironment::Serialize(std::ofstream& file) const {
const u64 code_size{static_cast<u64>(CachedSize())};
const u64 code_size{static_cast<u64>(CachedSizeBytes())};
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};

View File

@@ -48,9 +48,11 @@ public:
void SetCachedSize(size_t size_bytes);
[[nodiscard]] size_t CachedSize() const noexcept;
[[nodiscard]] size_t CachedSizeWords() const noexcept;
[[nodiscard]] size_t ReadSize() const noexcept;
[[nodiscard]] size_t CachedSizeBytes() const noexcept;
[[nodiscard]] size_t ReadSizeBytes() const noexcept;
[[nodiscard]] bool CanBeSerialized() const noexcept;

View File

@@ -250,10 +250,13 @@ bool IsPixelFormatASTC(PixelFormat format) {
case PixelFormat::ASTC_2D_6X6_UNORM:
case PixelFormat::ASTC_2D_6X6_SRGB:
case PixelFormat::ASTC_2D_10X6_UNORM:
case PixelFormat::ASTC_2D_10X6_SRGB:
case PixelFormat::ASTC_2D_10X5_UNORM:
case PixelFormat::ASTC_2D_10X5_SRGB:
case PixelFormat::ASTC_2D_10X10_UNORM:
case PixelFormat::ASTC_2D_10X10_SRGB:
case PixelFormat::ASTC_2D_12X10_UNORM:
case PixelFormat::ASTC_2D_12X10_SRGB:
case PixelFormat::ASTC_2D_12X12_UNORM:
case PixelFormat::ASTC_2D_12X12_SRGB:
case PixelFormat::ASTC_2D_8X6_UNORM:
@@ -279,11 +282,13 @@ bool IsPixelFormatSRGB(PixelFormat format) {
case PixelFormat::ASTC_2D_8X5_SRGB:
case PixelFormat::ASTC_2D_5X4_SRGB:
case PixelFormat::ASTC_2D_5X5_SRGB:
case PixelFormat::ASTC_2D_10X6_SRGB:
case PixelFormat::ASTC_2D_10X8_SRGB:
case PixelFormat::ASTC_2D_6X6_SRGB:
case PixelFormat::ASTC_2D_10X5_SRGB:
case PixelFormat::ASTC_2D_10X10_SRGB:
case PixelFormat::ASTC_2D_12X12_SRGB:
case PixelFormat::ASTC_2D_12X10_SRGB:
case PixelFormat::ASTC_2D_8X6_SRGB:
case PixelFormat::ASTC_2D_6X5_SRGB:
return true;

View File

@@ -95,10 +95,13 @@ enum class PixelFormat {
ASTC_2D_6X6_UNORM,
ASTC_2D_6X6_SRGB,
ASTC_2D_10X6_UNORM,
ASTC_2D_10X6_SRGB,
ASTC_2D_10X5_UNORM,
ASTC_2D_10X5_SRGB,
ASTC_2D_10X10_UNORM,
ASTC_2D_10X10_SRGB,
ASTC_2D_12X10_UNORM,
ASTC_2D_12X10_SRGB,
ASTC_2D_12X12_UNORM,
ASTC_2D_12X12_SRGB,
ASTC_2D_8X6_UNORM,
@@ -232,10 +235,13 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
6, // ASTC_2D_6X6_UNORM
6, // ASTC_2D_6X6_SRGB
10, // ASTC_2D_10X6_UNORM
10, // ASTC_2D_10X6_SRGB
10, // ASTC_2D_10X5_UNORM
10, // ASTC_2D_10X5_SRGB
10, // ASTC_2D_10X10_UNORM
10, // ASTC_2D_10X10_SRGB
12, // ASTC_2D_12X10_UNORM
12, // ASTC_2D_12X10_SRGB
12, // ASTC_2D_12X12_UNORM
12, // ASTC_2D_12X12_SRGB
8, // ASTC_2D_8X6_UNORM
@@ -338,10 +344,13 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
6, // ASTC_2D_6X6_UNORM
6, // ASTC_2D_6X6_SRGB
6, // ASTC_2D_10X6_UNORM
6, // ASTC_2D_10X6_SRGB
5, // ASTC_2D_10X5_UNORM
5, // ASTC_2D_10X5_SRGB
10, // ASTC_2D_10X10_UNORM
10, // ASTC_2D_10X10_SRGB
10, // ASTC_2D_12X10_UNORM
10, // ASTC_2D_12X10_SRGB
12, // ASTC_2D_12X12_UNORM
12, // ASTC_2D_12X12_SRGB
6, // ASTC_2D_8X6_UNORM
@@ -444,10 +453,13 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
128, // ASTC_2D_6X6_UNORM
128, // ASTC_2D_6X6_SRGB
128, // ASTC_2D_10X6_UNORM
128, // ASTC_2D_10X6_SRGB
128, // ASTC_2D_10X5_UNORM
128, // ASTC_2D_10X5_SRGB
128, // ASTC_2D_10X10_UNORM
128, // ASTC_2D_10X10_SRGB
128, // ASTC_2D_12X10_UNORM
128, // ASTC_2D_12X10_SRGB
128, // ASTC_2D_12X12_UNORM
128, // ASTC_2D_12X12_SRGB
128, // ASTC_2D_8X6_UNORM

View File

@@ -210,6 +210,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::ASTC_2D_6X6_SRGB;
case Hash(TextureFormat::ASTC_2D_10X6, UNORM, LINEAR):
return PixelFormat::ASTC_2D_10X6_UNORM;
case Hash(TextureFormat::ASTC_2D_10X6, UNORM, SRGB):
return PixelFormat::ASTC_2D_10X6_SRGB;
case Hash(TextureFormat::ASTC_2D_10X5, UNORM, LINEAR):
return PixelFormat::ASTC_2D_10X5_UNORM;
case Hash(TextureFormat::ASTC_2D_10X5, UNORM, SRGB):
@@ -218,6 +220,10 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::ASTC_2D_10X10_UNORM;
case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB):
return PixelFormat::ASTC_2D_10X10_SRGB;
case Hash(TextureFormat::ASTC_2D_12X10, UNORM, LINEAR):
return PixelFormat::ASTC_2D_12X10_UNORM;
case Hash(TextureFormat::ASTC_2D_12X10, UNORM, SRGB):
return PixelFormat::ASTC_2D_12X10_SRGB;
case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR):
return PixelFormat::ASTC_2D_12X12_UNORM;
case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB):

View File

@@ -46,7 +46,7 @@ std::string Name(const ImageBase& image) {
return "Invalid";
}
std::string Name(const ImageViewBase& image_view) {
std::string Name(const ImageViewBase& image_view, GPUVAddr addr) {
const u32 width = image_view.size.width;
const u32 height = image_view.size.height;
const u32 depth = image_view.size.depth;
@@ -56,23 +56,25 @@ std::string Name(const ImageViewBase& image_view) {
const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
switch (image_view.type) {
case ImageViewType::e1D:
return fmt::format("ImageView 1D {}{}", width, level);
return fmt::format("ImageView 1D 0x{:X} {}{}", addr, width, level);
case ImageViewType::e2D:
return fmt::format("ImageView 2D {}x{}{}", width, height, level);
return fmt::format("ImageView 2D 0x{:X} {}x{}{}", addr, width, height, level);
case ImageViewType::Cube:
return fmt::format("ImageView Cube {}x{}{}", width, height, level);
return fmt::format("ImageView Cube 0x{:X} {}x{}{}", addr, width, height, level);
case ImageViewType::e3D:
return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level);
return fmt::format("ImageView 3D 0x{:X} {}x{}x{}{}", addr, width, height, depth, level);
case ImageViewType::e1DArray:
return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers);
return fmt::format("ImageView 1DArray 0x{:X} {}{}|{}", addr, width, level, num_layers);
case ImageViewType::e2DArray:
return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers);
return fmt::format("ImageView 2DArray 0x{:X} {}x{}{}|{}", addr, width, height, level,
num_layers);
case ImageViewType::CubeArray:
return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers);
return fmt::format("ImageView CubeArray 0x{:X} {}x{}{}|{}", addr, width, height, level,
num_layers);
case ImageViewType::Rect:
return fmt::format("ImageView Rect {}x{}{}", width, height, level);
return fmt::format("ImageView Rect 0x{:X} {}x{}{}", addr, width, height, level);
case ImageViewType::Buffer:
return fmt::format("BufferView {}", width);
return fmt::format("BufferView 0x{:X} {}", addr, width);
}
return "Invalid";
}

View File

@@ -179,6 +179,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "ASTC_2D_6X6_SRGB";
case PixelFormat::ASTC_2D_10X6_UNORM:
return "ASTC_2D_10X6_UNORM";
case PixelFormat::ASTC_2D_10X6_SRGB:
return "ASTC_2D_10X6_SRGB";
case PixelFormat::ASTC_2D_10X5_UNORM:
return "ASTC_2D_10X5_UNORM";
case PixelFormat::ASTC_2D_10X5_SRGB:
@@ -187,6 +189,10 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "ASTC_2D_10X10_UNORM";
case PixelFormat::ASTC_2D_10X10_SRGB:
return "ASTC_2D_10X10_SRGB";
case PixelFormat::ASTC_2D_12X10_UNORM:
return "ASTC_2D_12X10_UNORM";
case PixelFormat::ASTC_2D_12X10_SRGB:
return "ASTC_2D_12X10_SRGB";
case PixelFormat::ASTC_2D_12X12_UNORM:
return "ASTC_2D_12X12_UNORM";
case PixelFormat::ASTC_2D_12X12_SRGB:
@@ -268,7 +274,7 @@ struct RenderTargets;
[[nodiscard]] std::string Name(const ImageBase& image);
[[nodiscard]] std::string Name(const ImageViewBase& image_view);
[[nodiscard]] std::string Name(const ImageViewBase& image_view, GPUVAddr addr);
[[nodiscard]] std::string Name(const RenderTargets& render_targets);

View File

@@ -16,8 +16,8 @@
namespace VideoCommon {
ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
ImageId image_id_)
: image_id{image_id_}, format{info.format}, type{info.type}, range{info.range},
ImageId image_id_, GPUVAddr addr)
: image_id{image_id_}, gpu_addr{addr}, format{info.format}, type{info.type}, range{info.range},
size{
.width = std::max(image_info.size.width >> range.base.level, 1u),
.height = std::max(image_info.size.height >> range.base.level, 1u),
@@ -35,8 +35,8 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
}
}
ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info)
: image_id{NULL_IMAGE_ID}, format{info.format}, type{ImageViewType::Buffer},
ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info, GPUVAddr addr)
: image_id{NULL_IMAGE_ID}, gpu_addr{addr}, format{info.format}, type{ImageViewType::Buffer},
size{
.width = info.size.width,
.height = 1,

View File

@@ -24,9 +24,9 @@ enum class ImageViewFlagBits : u16 {
DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
struct ImageViewBase {
explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
ImageId image_id);
explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info);
explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id,
GPUVAddr addr);
explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info, GPUVAddr addr);
explicit ImageViewBase(const NullImageViewParams&);
[[nodiscard]] bool IsBuffer() const noexcept {
@@ -34,6 +34,7 @@ struct ImageViewBase {
}
ImageId image_id{};
GPUVAddr gpu_addr = 0;
PixelFormat format{};
ImageViewType type{};
SubresourceRange range;

View File

@@ -888,7 +888,7 @@ void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* i
buffer,
download_map.buffer,
};
std::array buffer_offsets{
std::array<u64, 2> buffer_offsets{
buffer_offset,
download_map.offset,
};

View File

@@ -10,6 +10,7 @@
#include <vector>
#include "common/common_types.h"
#include "common/settings.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
// Define all features which may be used by the implementation here.
@@ -510,7 +511,7 @@ public:
/// Returns true when a known debugging tool is attached.
bool HasDebuggingToolAttached() const {
return has_renderdoc || has_nsight_graphics;
return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue();
}
/// Returns true when the device does not properly support cube compatibility.

View File

@@ -497,7 +497,7 @@ void Config::ReadCoreValues() {
qt_config->beginGroup(QStringLiteral("Core"));
ReadGlobalSetting(Settings::values.use_multi_core);
ReadGlobalSetting(Settings::values.use_extended_memory_layout);
ReadGlobalSetting(Settings::values.use_unsafe_extended_memory_layout);
qt_config->endGroup();
}
@@ -692,6 +692,7 @@ void Config::ReadRendererValues() {
qt_config->beginGroup(QStringLiteral("Renderer"));
ReadGlobalSetting(Settings::values.renderer_backend);
ReadGlobalSetting(Settings::values.async_presentation);
ReadGlobalSetting(Settings::values.renderer_force_max_clock);
ReadGlobalSetting(Settings::values.vulkan_device);
ReadGlobalSetting(Settings::values.fullscreen_mode);
@@ -712,7 +713,6 @@ void Config::ReadRendererValues() {
ReadGlobalSetting(Settings::values.shader_backend);
ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
ReadGlobalSetting(Settings::values.use_fast_gpu_time);
ReadGlobalSetting(Settings::values.use_pessimistic_flushes);
ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
ReadGlobalSetting(Settings::values.bg_red);
ReadGlobalSetting(Settings::values.bg_green);
@@ -1161,7 +1161,7 @@ void Config::SaveCoreValues() {
qt_config->beginGroup(QStringLiteral("Core"));
WriteGlobalSetting(Settings::values.use_multi_core);
WriteGlobalSetting(Settings::values.use_extended_memory_layout);
WriteGlobalSetting(Settings::values.use_unsafe_extended_memory_layout);
qt_config->endGroup();
}
@@ -1313,6 +1313,7 @@ void Config::SaveRendererValues() {
static_cast<u32>(Settings::values.renderer_backend.GetValue(global)),
static_cast<u32>(Settings::values.renderer_backend.GetDefault()),
Settings::values.renderer_backend.UsingGlobal());
WriteGlobalSetting(Settings::values.async_presentation);
WriteGlobalSetting(Settings::values.renderer_force_max_clock);
WriteGlobalSetting(Settings::values.vulkan_device);
WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()),
@@ -1357,7 +1358,6 @@ void Config::SaveRendererValues() {
Settings::values.shader_backend.UsingGlobal());
WriteGlobalSetting(Settings::values.use_asynchronous_shaders);
WriteGlobalSetting(Settings::values.use_fast_gpu_time);
WriteGlobalSetting(Settings::values.use_pessimistic_flushes);
WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
WriteGlobalSetting(Settings::values.bg_red);
WriteGlobalSetting(Settings::values.bg_green);

View File

@@ -35,9 +35,6 @@ void ConfigureGeneral::SetConfiguration() {
ui->use_multi_core->setEnabled(runtime_lock);
ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue());
ui->use_extended_memory_layout->setEnabled(runtime_lock);
ui->use_extended_memory_layout->setChecked(
Settings::values.use_extended_memory_layout.GetValue());
ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing.GetValue());
ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot.GetValue());
@@ -79,9 +76,6 @@ void ConfigureGeneral::ResetDefaults() {
void ConfigureGeneral::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, ui->use_multi_core,
use_multi_core);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_extended_memory_layout,
ui->use_extended_memory_layout,
use_extended_memory_layout);
if (Settings::IsConfiguringGlobal()) {
UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked();
@@ -141,9 +135,6 @@ void ConfigureGeneral::SetupPerGameUI() {
Settings::values.use_speed_limit, use_speed_limit);
ConfigurationShared::SetColoredTristate(ui->use_multi_core, Settings::values.use_multi_core,
use_multi_core);
ConfigurationShared::SetColoredTristate(ui->use_extended_memory_layout,
Settings::values.use_extended_memory_layout,
use_extended_memory_layout);
connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit, [this]() {
ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked() &&

View File

@@ -47,7 +47,6 @@ private:
ConfigurationShared::CheckState use_speed_limit;
ConfigurationShared::CheckState use_multi_core;
ConfigurationShared::CheckState use_extended_memory_layout;
const Core::System& system;
};

View File

@@ -61,13 +61,6 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="use_extended_memory_layout">
<property name="text">
<string>Extended memory layout (8GB DRAM)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="toggle_check_exit">
<property name="text">

View File

@@ -22,17 +22,18 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
void ConfigureGraphicsAdvanced::SetConfiguration() {
const bool runtime_lock = !system.IsPoweredOn();
ui->use_vsync->setEnabled(runtime_lock);
ui->async_present->setEnabled(runtime_lock);
ui->renderer_force_max_clock->setEnabled(runtime_lock);
ui->async_astc->setEnabled(runtime_lock);
ui->use_asynchronous_shaders->setEnabled(runtime_lock);
ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
ui->async_present->setChecked(Settings::values.async_presentation.GetValue());
ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
ui->async_astc->setChecked(Settings::values.async_astc.GetValue());
ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue());
ui->use_vulkan_driver_pipeline_cache->setChecked(
Settings::values.use_vulkan_driver_pipeline_cache.GetValue());
@@ -54,6 +55,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
void ConfigureGraphicsAdvanced::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_presentation,
ui->async_present, async_present);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock,
ui->renderer_force_max_clock,
renderer_force_max_clock);
@@ -67,8 +70,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
use_asynchronous_shaders);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,
ui->use_fast_gpu_time, use_fast_gpu_time);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_pessimistic_flushes,
ui->use_pessimistic_flushes, use_pessimistic_flushes);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vulkan_driver_pipeline_cache,
ui->use_vulkan_driver_pipeline_cache,
use_vulkan_driver_pipeline_cache);
@@ -90,6 +91,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
// Disable if not global (only happens during game)
if (Settings::IsConfiguringGlobal()) {
ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
ui->async_present->setEnabled(Settings::values.async_presentation.UsingGlobal());
ui->renderer_force_max_clock->setEnabled(
Settings::values.renderer_force_max_clock.UsingGlobal());
ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
@@ -97,8 +99,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
ui->use_asynchronous_shaders->setEnabled(
Settings::values.use_asynchronous_shaders.UsingGlobal());
ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
ui->use_pessimistic_flushes->setEnabled(
Settings::values.use_pessimistic_flushes.UsingGlobal());
ui->use_vulkan_driver_pipeline_cache->setEnabled(
Settings::values.use_vulkan_driver_pipeline_cache.UsingGlobal());
ui->anisotropic_filtering_combobox->setEnabled(
@@ -107,6 +107,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
return;
}
ConfigurationShared::SetColoredTristate(ui->async_present, Settings::values.async_presentation,
async_present);
ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock,
Settings::values.renderer_force_max_clock,
renderer_force_max_clock);
@@ -118,9 +120,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
use_asynchronous_shaders);
ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,
Settings::values.use_fast_gpu_time, use_fast_gpu_time);
ConfigurationShared::SetColoredTristate(ui->use_pessimistic_flushes,
Settings::values.use_pessimistic_flushes,
use_pessimistic_flushes);
ConfigurationShared::SetColoredTristate(ui->use_vulkan_driver_pipeline_cache,
Settings::values.use_vulkan_driver_pipeline_cache,
use_vulkan_driver_pipeline_cache);

View File

@@ -36,12 +36,12 @@ private:
std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
ConfigurationShared::CheckState async_present;
ConfigurationShared::CheckState renderer_force_max_clock;
ConfigurationShared::CheckState use_vsync;
ConfigurationShared::CheckState async_astc;
ConfigurationShared::CheckState use_asynchronous_shaders;
ConfigurationShared::CheckState use_fast_gpu_time;
ConfigurationShared::CheckState use_pessimistic_flushes;
ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache;
const Core::System& system;

View File

@@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>404</width>
<height>321</height>
<height>376</height>
</rect>
</property>
<property name="windowTitle">
@@ -69,6 +69,13 @@
</layout>
</widget>
</item>
<item>
<widget class="QCheckBox" name="async_present">
<property name="text">
<string>Enable asynchronous presentation (Vulkan only)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="renderer_force_max_clock">
<property name="toolTip">
@@ -112,27 +119,17 @@
<item>
<widget class="QCheckBox" name="use_fast_gpu_time">
<property name="toolTip">
<string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
<string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
</property>
<property name="text">
<string>Use Fast GPU Time (Hack)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="use_pessimistic_flushes">
<property name="toolTip">
<string>Enables pessimistic buffer flushes. This option will force unmodified buffers to be flushed, which can cost performance.</string>
</property>
<property name="text">
<string>Use pessimistic buffer flushes (Hack)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache">
<property name="toolTip">
<string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string>
<string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string>
</property>
<property name="text">
<string>Use Vulkan pipeline cache</string>

View File

@@ -111,6 +111,9 @@ void ConfigureSystem::SetConfiguration() {
ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time));
ui->device_name_edit->setText(
QString::fromUtf8(Settings::values.device_name.GetValue().c_str()));
ui->use_unsafe_extended_memory_layout->setEnabled(enabled);
ui->use_unsafe_extended_memory_layout->setChecked(
Settings::values.use_unsafe_extended_memory_layout.GetValue());
if (Settings::IsConfiguringGlobal()) {
ui->combo_language->setCurrentIndex(Settings::values.language_index.GetValue());
@@ -160,6 +163,9 @@ void ConfigureSystem::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.region_index, ui->combo_region);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.time_zone_index,
ui->combo_time_zone);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_unsafe_extended_memory_layout,
ui->use_unsafe_extended_memory_layout,
use_unsafe_extended_memory_layout);
if (Settings::IsConfiguringGlobal()) {
// Guard if during game and set to game-specific value
@@ -215,6 +221,10 @@ void ConfigureSystem::SetupPerGameUI() {
Settings::values.rng_seed.GetValue().has_value(),
Settings::values.rng_seed.GetValue(true).has_value(), use_rng_seed);
ConfigurationShared::SetColoredTristate(ui->use_unsafe_extended_memory_layout,
Settings::values.use_unsafe_extended_memory_layout,
use_unsafe_extended_memory_layout);
ui->custom_rtc_checkbox->setVisible(false);
ui->custom_rtc_edit->setVisible(false);
}

View File

@@ -41,6 +41,7 @@ private:
bool enabled = false;
ConfigurationShared::CheckState use_rng_seed;
ConfigurationShared::CheckState use_unsafe_extended_memory_layout;
Core::System& system;
};

View File

@@ -478,6 +478,13 @@
</property>
</widget>
</item>
<item row="7" column="0">
<widget class="QCheckBox" name="use_unsafe_extended_memory_layout">
<property name="text">
<string>Unsafe extended memory layout (8GB DRAM)</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>

View File

@@ -27,6 +27,7 @@
#include "configuration/configure_input.h"
#include "configuration/configure_per_game.h"
#include "configuration/configure_tas.h"
#include "core/file_sys/romfs_factory.h"
#include "core/file_sys/vfs.h"
#include "core/file_sys/vfs_real.h"
#include "core/frontend/applets/cabinet.h"
@@ -4171,6 +4172,8 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
}
Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();
bool all_keys_present{true};
if (keys.BaseDeriveNecessary()) {
Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory("", FileSys::Mode::Read)};
@@ -4195,6 +4198,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
errors += tr(" - Missing PRODINFO");
}
if (!errors.isEmpty()) {
all_keys_present = false;
QMessageBox::warning(
this, tr("Derivation Components Missing"),
tr("Encryption keys are missing. "
@@ -4222,11 +4226,40 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
system->GetFileSystemController().CreateFactories(*vfs);
if (all_keys_present && !this->CheckSystemArchiveDecryption()) {
LOG_WARNING(Frontend, "Mii model decryption failed");
QMessageBox::warning(
this, tr("System Archive Decryption Failed"),
tr("Encryption keys failed to decrypt firmware. "
"<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the yuzu "
"quickstart guide</a> to get all your keys, firmware and "
"games."));
}
if (behavior == ReinitializeKeyBehavior::Warning) {
game_list->PopulateAsync(UISettings::values.game_dirs);
}
}
bool GMainWindow::CheckSystemArchiveDecryption() {
constexpr u64 MiiModelId = 0x0100000000000802;
auto bis_system = system->GetFileSystemController().GetSystemNANDContents();
if (!bis_system) {
// Not having system BIS files is not an error.
return true;
}
auto mii_nca = bis_system->GetEntry(MiiModelId, FileSys::ContentRecordType::Data);
if (!mii_nca) {
// Not having the Mii model is not an error.
return true;
}
// Return whether we are able to decrypt the RomFS of the Mii model.
return mii_nca->GetRomFS().get() != nullptr;
}
std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed,
u64 program_id) {
const auto dlc_entries =

View File

@@ -392,6 +392,7 @@ private:
void LoadTranslation();
void OpenPerGameConfiguration(u64 title_id, const std::string& file_name);
bool CheckDarkMode();
bool CheckSystemArchiveDecryption();
QString GetTasStateDescription() const;
bool CreateShortcut(const std::string& shortcut_path, const std::string& title,

View File

@@ -274,7 +274,7 @@ void Config::ReadValues() {
// Core
ReadSetting("Core", Settings::values.use_multi_core);
ReadSetting("Core", Settings::values.use_extended_memory_layout);
ReadSetting("Core", Settings::values.use_unsafe_extended_memory_layout);
// Cpu
ReadSetting("Cpu", Settings::values.cpu_accuracy);
@@ -300,6 +300,7 @@ void Config::ReadValues() {
// Renderer
ReadSetting("Renderer", Settings::values.renderer_backend);
ReadSetting("Renderer", Settings::values.async_presentation);
ReadSetting("Renderer", Settings::values.renderer_force_max_clock);
ReadSetting("Renderer", Settings::values.renderer_debug);
ReadSetting("Renderer", Settings::values.renderer_shader_feedback);
@@ -326,7 +327,6 @@ void Config::ReadValues() {
ReadSetting("Renderer", Settings::values.accelerate_astc);
ReadSetting("Renderer", Settings::values.async_astc);
ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
ReadSetting("Renderer", Settings::values.use_pessimistic_flushes);
ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache);
ReadSetting("Renderer", Settings::values.bg_red);

View File

@@ -163,9 +163,9 @@ keyboard_enabled =
# 0: Disabled, 1 (default): Enabled
use_multi_core =
# Enable extended guest system memory layout (8GB DRAM)
# Enable unsafe extended guest system memory layout (8GB DRAM)
# 0 (default): Disabled, 1: Enabled
use_extended_memory_layout =
use_unsafe_extended_memory_layout =
[Cpu]
# Adjusts various optimizations.
@@ -264,6 +264,10 @@ cpuopt_unsafe_ignore_global_monitor =
# 0: OpenGL, 1 (default): Vulkan
backend =
# Whether to enable asynchronous presentation (Vulkan only)
# 0 (default): Off, 1: On
async_presentation =
# Enable graphics API debugging mode.
# 0 (default): Disabled, 1: Enabled
debug =
@@ -370,10 +374,6 @@ use_asynchronous_gpu_emulation =
# 0: Off, 1 (default): On
use_fast_gpu_time =
# Force unmodified buffers to be flushed, which can cost performance.
# 0: Off (default), 1: On
use_pessimistic_flushes =
# Whether to use garbage collection or not for GPU caches.
# 0 (default): Off, 1: On
use_caches_gc =

View File

@@ -49,7 +49,7 @@
"overrides": [
{
"name": "catch2",
"version": "3.0.1"
"version": "3.3.1"
},
{
"name": "fmt",