Compare commits

..

15 Commits

Author SHA1 Message Date
Liam
7ec66db22c qt: warn on inoperable keys 2023-04-30 23:47:42 -04:00
bunnei
689f9a75a7 Merge pull request #10110 from Morph1984/intel-disable-compute
vk_pipeline_cache: Skip compute pipelines on Intel proprietary drivers
2023-04-29 23:02:45 -07:00
bunnei
fe57f39676 Merge pull request #10082 from FernandoS27/the-testers-really-love-chocolate
Refactor Accelerate DMA and do downloads through TC.
2023-04-29 11:46:01 -07:00
Fernando Sahmkow
4bc5469f52 Texture Cache: Release stagging buffers on tick frame 2023-04-29 15:31:38 +02:00
Fernando Sahmkow
58d1c7c77a Address Feedback & Clang Format 2023-04-29 00:18:21 +02:00
Fernando Sahmkow
56c9730a16 Maxwell3D: only update parameters on High 2023-04-29 00:18:21 +02:00
Fernando Sahmkow
e3a2ca96bd Accelerate DMA: Use texture cache async downloads to perform the copies
to host.

WIP
2023-04-29 00:18:21 +02:00
Fernando Sahmkow
3fbee093b2 TextureCache: refactor DMA downloads to allow multiple buffers. 2023-04-29 00:18:21 +02:00
Morph
cb092af3f0 vk_pipeline_cache: Skip compute pipelines on Intel proprietary drivers
Intel's SPIR-V shader compiler is broken. For now, skip compiling any compute pipelines until they fix this issue.
This is not a perfect workaround, as there are a small subset of non-compute pipelines that still cause it to crash, but this should cover the majority of crashes.
It is unfortunate that even with a test case reported 6 months ago the issue has not been fixed in favor of fixing "the most popular games and apps".
Intel, you can do better than this.
2023-04-28 17:59:36 -04:00
Fernando S
9bf19b04f6 Merge pull request #10051 from liamwhite/surface-capabilities
vulkan: pick alpha composite flags based on available values
2023-04-24 12:37:13 +02:00
Fernando S
47cd0586ee Merge pull request #10056 from vonchenplus/audout_u
core: audio: return result when audio_out initialize failed
2023-04-24 12:36:52 +02:00
Fernando S
2311fa7c84 Merge pull request #10069 from liamwhite/log
maxwell_3d: fix out of bounds array access in size estimation
2023-04-24 12:36:24 +02:00
Liam
eb7c2314f6 maxwell_3d: fix out of bounds array access in size estimation 2023-04-22 10:35:26 -04:00
FengChen
55a33342cc core: audio: return result when audio_out initialize failed 2023-04-16 12:31:54 +08:00
Liam
e37e1d24f9 vulkan: pick alpha composite flags based on available values 2023-04-13 16:38:20 -04:00
26 changed files with 361 additions and 378 deletions

View File

@@ -49,12 +49,6 @@ public:
};
// clang-format on
RegisterHandlers(functions);
if (impl->GetSystem()
.Initialize(device_name, in_params, handle, applet_resource_user_id)
.IsError()) {
LOG_ERROR(Service_Audio, "Failed to initialize the AudioOut System!");
}
}
~IAudioOut() override {
@@ -287,6 +281,14 @@ void AudOutU::OpenAudioOut(HLERequestContext& ctx) {
auto audio_out = std::make_shared<IAudioOut>(system, *impl, new_session_id, device_name,
in_params, handle, applet_resource_user_id);
result = audio_out->GetImpl()->GetSystem().Initialize(device_name, in_params, handle,
applet_resource_user_id);
if (result.IsError()) {
LOG_ERROR(Service_Audio, "Failed to initialize the AudioOut System!");
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(result);
return;
}
impl->sessions[new_session_id] = audio_out->GetImpl();
impl->applet_resource_user_ids[new_session_id] = applet_resource_user_id;

View File

@@ -462,7 +462,7 @@ struct Memory::Impl {
}
if (Settings::IsFastmemEnabled()) {
const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
}

View File

@@ -4,6 +4,7 @@
#include <cstring>
#include <optional>
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/core.h"
@@ -222,6 +223,9 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
}
void Maxwell3D::RefreshParametersImpl() {
if (!Settings::IsGPULevelHigh()) {
return;
}
size_t current_index = 0;
for (auto& segment : macro_segments) {
if (segment.first == 0) {
@@ -259,12 +263,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
size_t Maxwell3D::EstimateIndexBufferSize() {
GPUVAddr start_address = regs.index_buffer.StartAddress();
GPUVAddr end_address = regs.index_buffer.EndAddress();
static constexpr std::array<size_t, 4> max_sizes = {
std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
static constexpr std::array<size_t, 3> max_sizes = {std::numeric_limits<u8>::max(),
std::numeric_limits<u16>::max(),
std::numeric_limits<u32>::max()};
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
const size_t log2_byte_size = Common::Log2Ceil64(byte_size);
return std::min<size_t>(
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) /
byte_size,
static_cast<size_t>(end_address - start_address));
}

View File

@@ -4,20 +4,13 @@
#pragma once
#include <algorithm>
#include <condition_variable>
#include <cstring>
#include <deque>
#include <functional>
#include <memory>
#include <mutex>
#include <thread>
#include <queue>
#include "common/common_types.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/thread.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
#include "video_core/host1x/host1x.h"
@@ -30,26 +23,15 @@ class FenceBase {
public:
explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {}
bool IsStubbed() const {
return is_stubbed;
}
protected:
bool is_stubbed;
};
template <typename Traits>
template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
class FenceManager {
using TFence = typename Traits::FenceType;
using TTextureCache = typename Traits::TextureCacheType;
using TBufferCache = typename Traits::BufferCacheType;
using TQueryCache = typename Traits::QueryCacheType;
static constexpr bool can_async_check = Traits::HAS_ASYNC_CHECK;
public:
/// Notify the fence manager about a new frame
void TickFrame() {
std::unique_lock lock(ring_guard);
delayed_destruction_ring.Tick();
}
@@ -64,33 +46,17 @@ public:
}
void SignalFence(std::function<void()>&& func) {
rasterizer.InvalidateGPUCache();
bool delay_fence = Settings::IsGPULevelHigh();
if constexpr (!can_async_check) {
TryReleasePendingFences<false>();
}
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
uncommitted_operations.emplace_back(std::move(func));
CommitOperations();
TFence new_fence = CreateFence(!should_flush);
if constexpr (can_async_check) {
guard.lock();
}
if (delay_fence) {
uncommitted_operations.emplace_back(std::move(func));
}
pending_operations.emplace_back(std::move(uncommitted_operations));
fences.push(new_fence);
QueueFence(new_fence);
if (!delay_fence) {
func();
}
fences.push(std::move(new_fence));
if (should_flush) {
rasterizer.FlushCommands();
}
if constexpr (can_async_check) {
guard.unlock();
cv.notify_all();
}
}
void SignalSyncPoint(u32 value) {
@@ -100,30 +66,29 @@ public:
}
void WaitPendingFences() {
if constexpr (!can_async_check) {
TryReleasePendingFences<true>();
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait()) {
WaitFence(current_fence);
}
PopAsyncFlushes();
auto operations = std::move(pending_operations.front());
pending_operations.pop_front();
for (auto& operation : operations) {
operation();
}
PopFence();
}
}
protected:
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TTextureCache& texture_cache_, TBufferCache& buffer_cache_,
TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
TQueryCache& query_cache_)
: rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()},
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {
if constexpr (can_async_check) {
fence_thread =
std::jthread([this](std::stop_token token) { ReleaseThreadFunc(token); });
}
}
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
virtual ~FenceManager() {
if constexpr (can_async_check) {
fence_thread.request_stop();
cv.notify_all();
fence_thread.join();
}
}
virtual ~FenceManager() = default;
/// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
@@ -139,20 +104,15 @@ protected:
Tegra::GPU& gpu;
Tegra::Host1x::SyncpointManager& syncpoint_manager;
TTextureCache& texture_cache;
TBufferCache& buffer_cache;
TTBufferCache& buffer_cache;
TQueryCache& query_cache;
private:
template <bool force_wait>
void TryReleasePendingFences() {
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
if constexpr (force_wait) {
WaitFence(current_fence);
} else {
return;
}
return;
}
PopAsyncFlushes();
auto operations = std::move(pending_operations.front());
@@ -160,49 +120,7 @@ private:
for (auto& operation : operations) {
operation();
}
{
std::unique_lock lock(ring_guard);
delayed_destruction_ring.Push(std::move(current_fence));
}
fences.pop();
}
}
void ReleaseThreadFunc(std::stop_token stop_token) {
std::string name = "GPUFencingThread";
MicroProfileOnThreadCreate(name.c_str());
// Cleanup
SCOPE_EXIT({ MicroProfileOnThreadExit(); });
Common::SetCurrentThreadName(name.c_str());
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
TFence current_fence;
std::deque<std::function<void()>> current_operations;
while (!stop_token.stop_requested()) {
{
std::unique_lock lock(guard);
cv.wait(lock, [&] { return stop_token.stop_requested() || !fences.empty(); });
if (stop_token.stop_requested()) [[unlikely]] {
return;
}
current_fence = std::move(fences.front());
current_operations = std::move(pending_operations.front());
fences.pop();
pending_operations.pop_front();
}
if (!current_fence->IsStubbed()) {
WaitFence(current_fence);
}
PopAsyncFlushes();
for (auto& operation : current_operations) {
operation();
}
{
std::unique_lock lock(ring_guard);
delayed_destruction_ring.Push(std::move(current_fence));
}
PopFence();
}
}
@@ -236,16 +154,19 @@ private:
query_cache.CommitAsyncFlushes();
}
void PopFence() {
delayed_destruction_ring.Push(std::move(fences.front()));
fences.pop();
}
void CommitOperations() {
pending_operations.emplace_back(std::move(uncommitted_operations));
}
std::queue<TFence> fences;
std::deque<std::function<void()>> uncommitted_operations;
std::deque<std::deque<std::function<void()>>> pending_operations;
std::mutex guard;
std::mutex ring_guard;
std::condition_variable cv;
std::jthread fence_thread;
DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
};

View File

@@ -170,7 +170,6 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind,
bool is_big_pages) {
std::unique_lock<std::mutex> lock(guard);
if (is_big_pages) [[likely]] {
return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
}
@@ -178,7 +177,6 @@ GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
}
GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
std::unique_lock<std::mutex> lock(guard);
if (is_big_pages) [[likely]] {
return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
}
@@ -189,7 +187,6 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
if (size == 0) {
return;
}
std::unique_lock<std::mutex> lock(guard);
GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
for (const auto& [map_addr, map_size] : page_stash) {
@@ -556,7 +553,6 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const {
}
size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {
std::unique_lock<std::mutex> lock(guard);
return kind_map.GetContinuousSizeFrom(gpu_addr);
}
@@ -749,10 +745,10 @@ void MemoryManager::FlushCaching() {
return;
}
accumulator->Callback([this](GPUVAddr addr, size_t size) {
GetSubmappedRangeImpl<false>(addr, size, page_stash2);
GetSubmappedRangeImpl<false>(addr, size, page_stash);
});
rasterizer->InnerInvalidation(page_stash2);
page_stash2.clear();
rasterizer->InnerInvalidation(page_stash);
page_stash.clear();
accumulator->Clear();
}

View File

@@ -5,7 +5,6 @@
#include <atomic>
#include <map>
#include <mutex>
#include <optional>
#include <vector>
@@ -216,9 +215,6 @@ private:
std::vector<u64> big_page_continuous;
std::vector<std::pair<VAddr, std::size_t>> page_stash{};
std::vector<std::pair<VAddr, std::size_t>> page_stash2{};
mutable std::mutex guard;
static constexpr size_t continuous_bits = 64;

View File

@@ -6,7 +6,6 @@
#include <algorithm>
#include <array>
#include <cstring>
#include <functional>
#include <iterator>
#include <list>
#include <memory>
@@ -18,19 +17,13 @@
#include "common/assert.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/texture_cache/slot_vector.h"
namespace VideoCommon {
using AsyncJobId = SlotId;
static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0};
template <class QueryCache, class HostCounter>
class CounterStreamBase {
public:
@@ -100,13 +93,9 @@ private:
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_)
: rasterizer{rasterizer_},
cpu_memory{cpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {
(void)slot_async_jobs.insert(); // Null value
}
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_)
: rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex};
@@ -137,15 +126,10 @@ public:
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
}
auto result = query->BindCounter(Stream(type).Current(), timestamp);
if (result) {
auto async_job_id = query->GetAsyncJob();
auto& async_job = slot_async_jobs[async_job_id];
async_job.collected = true;
async_job.value = *result;
query->SetAsyncJob(NULL_ASYNC_JOB_ID);
query->BindCounter(Stream(type).Current(), timestamp);
if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
AsyncFlushQuery(*cpu_addr);
}
AsyncFlushQuery(query, timestamp, lock);
}
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@@ -189,18 +173,15 @@ public:
}
void CommitAsyncFlushes() {
std::unique_lock lock{mutex};
committed_flushes.push_back(uncommitted_flushes);
uncommitted_flushes.reset();
}
bool HasUncommittedFlushes() const {
std::unique_lock lock{mutex};
return uncommitted_flushes != nullptr;
}
bool ShouldWaitAsyncFlushes() const {
std::unique_lock lock{mutex};
if (committed_flushes.empty()) {
return false;
}
@@ -208,7 +189,6 @@ public:
}
void PopAsyncFlushes() {
std::unique_lock lock{mutex};
if (committed_flushes.empty()) {
return;
}
@@ -217,25 +197,15 @@ public:
committed_flushes.pop_front();
return;
}
for (AsyncJobId async_job_id : *flush_list) {
AsyncJob& async_job = slot_async_jobs[async_job_id];
if (!async_job.collected) {
FlushAndRemoveRegion(async_job.query_location, 2, true);
}
for (VAddr query_address : *flush_list) {
FlushAndRemoveRegion(query_address, 4);
}
committed_flushes.pop_front();
}
private:
struct AsyncJob {
bool collected = false;
u64 value = 0;
VAddr query_location = 0;
std::optional<u64> timestamp{};
};
/// Flushes a memory range to guest memory and removes it from the cache.
void FlushAndRemoveRegion(VAddr addr, std::size_t size, bool async = false) {
void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
const u64 addr_begin = addr;
const u64 addr_end = addr_begin + size;
const auto in_range = [addr_begin, addr_end](const CachedQuery& query) {
@@ -256,16 +226,7 @@ private:
continue;
}
rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
AsyncJobId async_job_id = query.GetAsyncJob();
auto flush_result = query.Flush(async);
if (async_job_id == NULL_ASYNC_JOB_ID) {
ASSERT_MSG(false, "This should not be reachable at all");
continue;
}
AsyncJob& async_job = slot_async_jobs[async_job_id];
async_job.collected = true;
async_job.value = flush_result;
query.SetAsyncJob(NULL_ASYNC_JOB_ID);
query.Flush();
}
std::erase_if(contents, in_range);
}
@@ -292,60 +253,26 @@ private:
return found != std::end(contents) ? &*found : nullptr;
}
void AsyncFlushQuery(CachedQuery* query, std::optional<u64> timestamp,
std::unique_lock<std::recursive_mutex>& lock) {
const AsyncJobId new_async_job_id = slot_async_jobs.insert();
{
AsyncJob& async_job = slot_async_jobs[new_async_job_id];
query->SetAsyncJob(new_async_job_id);
async_job.query_location = query->GetCpuAddr();
async_job.collected = false;
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::vector<AsyncJobId>>();
}
uncommitted_flushes->push_back(new_async_job_id);
void AsyncFlushQuery(VAddr addr) {
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::vector<VAddr>>();
}
lock.unlock();
std::function<void()> operation([this, new_async_job_id, timestamp] {
std::unique_lock local_lock{mutex};
AsyncJob& async_job = slot_async_jobs[new_async_job_id];
u64 value = async_job.value;
VAddr address = async_job.query_location;
slot_async_jobs.erase(new_async_job_id);
local_lock.unlock();
if (timestamp) {
u64 timestamp_value = *timestamp;
cpu_memory.WriteBlockUnsafe(address + sizeof(u64), &timestamp_value, sizeof(u64));
cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64));
rasterizer.InvalidateRegion(address, sizeof(u64) * 2,
VideoCommon::CacheType::NoQueryCache);
} else {
u32 small_value = static_cast<u32>(value);
cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32));
rasterizer.InvalidateRegion(address, sizeof(u32),
VideoCommon::CacheType::NoQueryCache);
}
});
rasterizer.SyncOperation(std::move(operation));
uncommitted_flushes->push_back(addr);
}
static constexpr std::uintptr_t YUZU_PAGESIZE = 4096;
static constexpr unsigned YUZU_PAGEBITS = 12;
SlotVector<AsyncJob> slot_async_jobs;
VideoCore::RasterizerInterface& rasterizer;
Core::Memory::Memory& cpu_memory;
mutable std::recursive_mutex mutex;
std::recursive_mutex mutex;
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{};
std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes;
};
template <class QueryCache, class HostCounter>
@@ -364,12 +291,12 @@ public:
virtual ~HostCounterBase() = default;
/// Returns the current value of the query.
u64 Query(bool async = false) {
u64 Query() {
if (result) {
return *result;
}
u64 value = BlockingQuery(async) + base_result;
u64 value = BlockingQuery() + base_result;
if (dependency) {
value += dependency->Query();
dependency = nullptr;
@@ -390,7 +317,7 @@ public:
protected:
/// Returns the value of query from the backend API blocking as needed.
virtual u64 BlockingQuery(bool async = false) const = 0;
virtual u64 BlockingQuery() const = 0;
private:
std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
@@ -413,33 +340,26 @@ public:
CachedQueryBase& operator=(const CachedQueryBase&) = delete;
/// Flushes the query to guest memory.
virtual u64 Flush(bool async = false) {
virtual void Flush() {
// When counter is nullptr it means that it's just been reset. We are supposed to write a
// zero in these cases.
const u64 value = counter ? counter->Query(async) : 0;
if (async) {
return value;
}
const u64 value = counter ? counter->Query() : 0;
std::memcpy(host_ptr, &value, sizeof(u64));
if (timestamp) {
std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
}
return value;
}
/// Binds a counter to this query.
std::optional<u64> BindCounter(std::shared_ptr<HostCounter> counter_,
std::optional<u64> timestamp_) {
std::optional<u64> result{};
void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
if (counter) {
// If there's an old counter set it means the query is being rewritten by the game.
// To avoid losing the data forever, flush here.
result = std::make_optional(Flush());
Flush();
}
counter = std::move(counter_);
timestamp = timestamp_;
return result;
}
VAddr GetCpuAddr() const noexcept {
@@ -454,14 +374,6 @@ public:
return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
}
void SetAsyncJob(AsyncJobId assigned_async_job_) {
assigned_async_job = assigned_async_job_;
}
AsyncJobId GetAsyncJob() const {
return assigned_async_job;
}
protected:
/// Returns true when querying the counter may potentially block.
bool WaitPending() const noexcept {
@@ -477,7 +389,6 @@ private:
u8* host_ptr; ///< Writable host pointer.
std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
AsyncJobId assigned_async_job;
};
} // namespace VideoCommon

View File

@@ -30,17 +30,7 @@ private:
};
using Fence = std::shared_ptr<GLInnerFence>;
struct FenceManagerParams {
using FenceType = Fence;
using BufferCacheType = BufferCache;
using TextureCacheType = TextureCache;
using QueryCacheType = QueryCache;
static constexpr bool HAS_ASYNC_CHECK = false;
};
using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:

View File

@@ -26,8 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
: QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
: QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;
@@ -74,7 +74,7 @@ void HostCounter::EndQuery() {
glEndQuery(GetTarget(type));
}
u64 HostCounter::BlockingQuery([[maybe_unused]] bool async) const {
u64 HostCounter::BlockingQuery() const {
GLint64 value;
glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
return static_cast<u64>(value);
@@ -96,7 +96,7 @@ CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
return *this;
}
u64 CachedQuery::Flush([[maybe_unused]] bool async) {
void CachedQuery::Flush() {
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
// To avoid this disable and re-enable keeping the dependency stream.
// But we only have to do this if we have pending waits to be done.
@@ -106,13 +106,11 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) {
stream.Update(false);
}
auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
VideoCommon::CachedQueryBase<HostCounter>::Flush();
if (slice_counter) {
stream.Update(true);
}
return result;
}
} // namespace OpenGL

View File

@@ -28,7 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_);
explicit QueryCache(RasterizerOpenGL& rasterizer_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -51,7 +51,7 @@ public:
void EndQuery();
private:
u64 BlockingQuery(bool async = false) const override;
u64 BlockingQuery() const override;
QueryCache& cache;
const VideoCore::QueryType type;
@@ -70,7 +70,7 @@ public:
CachedQuery(const CachedQuery&) = delete;
CachedQuery& operator=(const CachedQuery&) = delete;
u64 Flush(bool async = false) override;
void Flush() override;
private:
QueryCache* cache;

View File

@@ -63,7 +63,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),
query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache),
query_cache(*this), accelerate_dma(buffer_cache, texture_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
blit_image(program_manager_) {}
@@ -1287,8 +1287,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
}
const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
: VideoCommon::ObtainBufferOperation::MarkAsWritten;
const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
const auto [buffer, offset] =
buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
@@ -1299,7 +1298,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
if constexpr (IS_IMAGE_UPLOAD) {
image->UploadMemory(buffer->Handle(), offset, copy_span);
} else {
image->DownloadMemory(buffer->Handle(), offset, copy_span);
texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
buffer_operand.address, buffer_size);
}
return true;
}

View File

@@ -803,30 +803,40 @@ void Image::UploadMemory(const ImageBufferMap& map,
void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies) {
std::array buffer_handles{buffer_handle};
std::array buffer_offsets{buffer_offset};
DownloadMemory(buffer_handles, buffer_offsets, copies);
}
void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> buffer_offsets,
std::span<const VideoCommon::BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown();
}
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
for (size_t i = 0; i < buffer_handles.size(); i++) {
auto& buffer_handle = buffer_handles[i];
glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
u32 current_row_length = std::numeric_limits<u32>::max();
u32 current_image_height = std::numeric_limits<u32>::max();
u32 current_row_length = std::numeric_limits<u32>::max();
u32 current_image_height = std::numeric_limits<u32>::max();
for (const VideoCommon::BufferImageCopy& copy : copies) {
if (copy.image_subresource.base_level >= gl_num_levels) {
continue;
for (const VideoCommon::BufferImageCopy& copy : copies) {
if (copy.image_subresource.base_level >= gl_num_levels) {
continue;
}
if (current_row_length != copy.buffer_row_length) {
current_row_length = copy.buffer_row_length;
glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
}
if (current_image_height != copy.buffer_image_height) {
current_image_height = copy.buffer_image_height;
glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
}
CopyImageToBuffer(copy, buffer_offsets[i]);
}
if (current_row_length != copy.buffer_row_length) {
current_row_length = copy.buffer_row_length;
glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
}
if (current_image_height != copy.buffer_image_height) {
current_image_height = copy.buffer_image_height;
glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
}
CopyImageToBuffer(copy, buffer_offset);
}
if (is_rescaled) {
ScaleUp(true);

View File

@@ -215,6 +215,9 @@ public:
void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
@@ -376,6 +379,7 @@ struct TextureCacheParams {
using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer;
using AsyncBuffer = u32;
using BufferType = GLuint;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;

View File

@@ -5,7 +5,6 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"

View File

@@ -40,16 +40,7 @@ private:
};
using Fence = std::shared_ptr<InnerFence>;
struct FenceManagerParams {
using FenceType = Fence;
using BufferCacheType = BufferCache;
using TextureCacheType = TextureCache;
using QueryCacheType = QueryCache;
static constexpr bool HAS_ASYNC_CHECK = true;
};
using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
class FenceManager final : public GenericFenceManager {
public:

View File

@@ -696,6 +696,13 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
PipelineStatistics* statistics, bool build_in_parallel) try {
// TODO: Remove this when Intel fixes their shader compiler.
// https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159
if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
return nullptr;
}
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};

View File

@@ -66,10 +66,9 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
}
}
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
Scheduler& scheduler_)
: QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_},
: QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
query_pools{
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
} {}
@@ -99,10 +98,8 @@ HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> depend
query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
const vk::Device* logical = &cache.GetDevice().GetLogical();
cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
const bool use_precise = Settings::IsGPULevelHigh();
logical->ResetQueryPool(query.first, query.second, 1);
cmdbuf.BeginQuery(query.first, query.second,
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
});
}
@@ -115,10 +112,8 @@ void HostCounter::EndQuery() {
[query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
}
u64 HostCounter::BlockingQuery(bool async) const {
if (!async) {
cache.GetScheduler().Wait(tick);
}
u64 HostCounter::BlockingQuery() const {
cache.GetScheduler().Wait(tick);
u64 data;
const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
query.first, query.second, 1, sizeof(data), &data, sizeof(data),

View File

@@ -52,8 +52,7 @@ private:
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
Scheduler& scheduler_);
~QueryCache();
@@ -84,7 +83,7 @@ public:
void EndQuery();
private:
u64 BlockingQuery(bool async = false) const override;
u64 BlockingQuery() const override;
QueryCache& cache;
const VideoCore::QueryType type;

View File

@@ -172,8 +172,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
query_cache{*this, cpu_memory_, device, scheduler},
accelerate_dma(buffer_cache, texture_cache, scheduler),
query_cache{*this, device, scheduler}, accelerate_dma(buffer_cache, texture_cache, scheduler),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
@@ -676,8 +675,7 @@ bool RasterizerVulkan::AccelerateConditionalRendering() {
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
Maxwell::ReportSemaphore::Compare cmp;
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
VideoCommon::CacheType::BufferCache |
VideoCommon::CacheType::QueryCache)) {
VideoCommon::CacheType::BufferCache)) {
return true;
}
return false;
@@ -783,8 +781,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
}
const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
: VideoCommon::ObtainBufferOperation::MarkAsWritten;
const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
const auto [buffer, offset] =
buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
@@ -795,7 +792,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
if constexpr (IS_IMAGE_UPLOAD) {
image->UploadMemory(buffer->Handle(), offset, copy_span);
} else {
image->DownloadMemory(buffer->Handle(), offset, copy_span);
texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
buffer_operand.address, buffer_size);
}
return true;
}

View File

@@ -65,6 +65,18 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
return extent;
}
VkCompositeAlphaFlagBitsKHR ChooseAlphaFlags(const VkSurfaceCapabilitiesKHR& capabilities) {
if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR) {
return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
} else if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) {
return VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR;
} else {
LOG_ERROR(Render_Vulkan, "Unknown composite alpha flags value {:#x}",
capabilities.supportedCompositeAlpha);
return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
}
}
} // Anonymous namespace
Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_,
@@ -155,6 +167,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)};
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
present_mode = ChooseSwapPresentMode(present_modes);
@@ -185,7 +198,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.preTransform = capabilities.currentTransform,
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
.compositeAlpha = alpha_flags,
.presentMode = present_mode,
.clipped = VK_FALSE,
.oldSwapchain = nullptr,

View File

@@ -1,10 +1,11 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include <array>
#include <span>
#include <vector>
#include <boost/container/small_vector.hpp>
#include "common/bit_cast.h"
#include "common/bit_util.h"
@@ -1343,14 +1344,31 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
std::span<const VideoCommon::BufferImageCopy> copies) {
std::array buffer_handles{
buffer,
};
std::array buffer_offsets{
offset,
};
DownloadMemory(buffer_handles, buffer_offsets, copies);
}
void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceSize> offsets_span,
std::span<const VideoCommon::BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown();
}
std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
boost::container::small_vector<VkBuffer, 1> buffers_vector{};
boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies;
for (size_t index = 0; index < buffers_span.size(); index++) {
buffers_vector.emplace_back(buffers_span[index]);
vk_copies.emplace_back(
TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
}
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([buffer, image = *original_image, aspect_mask = aspect_mask,
vk_copies](vk::CommandBuffer cmdbuf) {
scheduler->Record([buffers = std::move(buffers_vector), image = *original_image,
aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -1369,6 +1387,20 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
for (size_t index = 0; index < buffers.size(); index++) {
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
vk_copies[index]);
}
const VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -1387,15 +1419,6 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
@@ -1405,7 +1428,13 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
}
void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
DownloadMemory(map.buffer, map.offset, copies);
std::array buffers{
map.buffer,
};
std::array offsets{
map.offset,
};
DownloadMemory(buffers, offsets, copies);
}
bool Image::IsRescaled() const noexcept {

View File

@@ -1,5 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
@@ -141,6 +141,9 @@ public:
void DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(std::span<VkBuffer> buffers, std::span<VkDeviceSize> offsets,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
@@ -371,6 +374,7 @@ struct TextureCacheParams {
using Sampler = Vulkan::Sampler;
using Framebuffer = Vulkan::Framebuffer;
using AsyncBuffer = Vulkan::StagingBufferRef;
using BufferType = VkBuffer;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;

View File

@@ -1,9 +1,10 @@
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <unordered_set>
#include <boost/container/small_vector.hpp>
#include "common/alignment.h"
#include "common/settings.h"
@@ -17,15 +18,10 @@
namespace VideoCommon {
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TextureType;
using Tegra::Texture::TICEntry;
using Tegra::Texture::TSCEntry;
using VideoCore::Surface::GetFormatType;
using VideoCore::Surface::IsCopyCompatible;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::SurfaceType;
using namespace Common::Literals;
@@ -143,6 +139,13 @@ void TextureCache<P>::TickFrame() {
runtime.TickFrame();
critical_gc = 0;
++frame_tick;
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
for (auto& buffer : async_buffers_death_ring) {
runtime.FreeDeferredStagingBuffer(buffer);
}
async_buffers_death_ring.clear();
}
}
template <class P>
@@ -661,25 +664,39 @@ template <class P>
void TextureCache<P>::CommitAsyncFlushes() {
// This is intentionally passing the value by copy
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
const std::span<const ImageId> download_ids = uncommitted_downloads;
auto& download_ids = uncommitted_downloads;
if (download_ids.empty()) {
committed_downloads.emplace_back(std::move(uncommitted_downloads));
uncommitted_downloads.clear();
async_buffers.emplace_back(std::optional<AsyncBuffer>{});
async_buffers.emplace_back(std::move(uncommitted_async_buffers));
uncommitted_async_buffers.clear();
return;
}
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
size_t last_async_buffer_id = uncommitted_async_buffers.size();
bool any_none_dma = false;
for (PendingDownload& download_info : download_ids) {
if (download_info.is_swizzle) {
total_size_bytes +=
Common::AlignUp(slot_images[download_info.object_id].unswizzled_size_bytes, 64);
any_none_dma = true;
download_info.async_buffer_id = last_async_buffer_id;
}
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
if (any_none_dma) {
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
for (const PendingDownload& download_info : download_ids) {
if (download_info.is_swizzle) {
Image& image = slot_images[download_info.object_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
}
}
uncommitted_async_buffers.emplace_back(download_map);
}
async_buffers.emplace_back(download_map);
async_buffers.emplace_back(std::move(uncommitted_async_buffers));
uncommitted_async_buffers.clear();
}
committed_downloads.emplace_back(std::move(uncommitted_downloads));
uncommitted_downloads.clear();
@@ -691,39 +708,57 @@ void TextureCache<P>::PopAsyncFlushes() {
return;
}
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
const std::span<const ImageId> download_ids = committed_downloads.front();
const auto& download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop_front();
async_buffers.pop_front();
return;
}
auto download_map = *async_buffers.front();
std::span<u8> download_span = download_map.mapped_span;
auto download_map = std::move(async_buffers.front());
for (size_t i = download_ids.size(); i > 0; i--) {
const ImageBase& image = slot_images[download_ids[i - 1]];
const auto copies = FullDownloadCopies(image.info);
download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
std::span<u8> download_span_alt = download_span.subspan(download_map.offset);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt,
swizzle_data_buffer);
auto& download_info = download_ids[i - 1];
auto& download_buffer = download_map[download_info.async_buffer_id];
if (download_info.is_swizzle) {
const ImageBase& image = slot_images[download_info.object_id];
const auto copies = FullDownloadCopies(image.info);
download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
std::span<u8> download_span =
download_buffer.mapped_span.subspan(download_buffer.offset);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
swizzle_data_buffer);
} else {
const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
std::span<u8> download_span =
download_buffer.mapped_span.subspan(download_buffer.offset);
gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
buffer_info.size);
slot_buffer_downloads.erase(download_info.object_id);
}
}
for (auto& download_buffer : download_map) {
async_buffers_death_ring.emplace_back(download_buffer);
}
runtime.FreeDeferredStagingBuffer(download_map);
committed_downloads.pop_front();
async_buffers.pop_front();
} else {
const std::span<const ImageId> download_ids = committed_downloads.front();
const auto& download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop_front();
return;
}
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
for (const PendingDownload& download_info : download_ids) {
if (download_info.is_swizzle) {
total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes;
}
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
const size_t original_offset = download_map.offset;
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
for (const PendingDownload& download_info : download_ids) {
if (!download_info.is_swizzle) {
continue;
}
Image& image = slot_images[download_info.object_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += image.unswizzled_size_bytes;
@@ -732,8 +767,11 @@ void TextureCache<P>::PopAsyncFlushes() {
runtime.Finish();
download_map.offset = original_offset;
std::span<u8> download_span = download_map.mapped_span;
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
for (const PendingDownload& download_info : download_ids) {
if (!download_info.is_swizzle) {
continue;
}
const ImageBase& image = slot_images[download_info.object_id];
const auto copies = FullDownloadCopies(image.info);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
swizzle_data_buffer);
@@ -833,6 +871,33 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm
return {image, copy};
}
template <class P>
void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* image,
typename TextureCache<P>::BufferType buffer,
size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies,
GPUVAddr address, size_t size) {
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
const BufferDownload new_buffer_download{address, size};
auto slot = slot_buffer_downloads.insert(new_buffer_download);
const PendingDownload new_download{false, uncommitted_async_buffers.size(), slot};
uncommitted_downloads.emplace_back(new_download);
auto download_map = runtime.DownloadStagingBuffer(size, true);
uncommitted_async_buffers.emplace_back(download_map);
std::array buffers{
buffer,
download_map.buffer,
};
std::array buffer_offsets{
buffer_offset,
download_map.offset,
};
image->DownloadMemory(buffers, buffer_offsets, copies);
} else {
image->DownloadMemory(buffer, buffer_offset, copies);
}
}
template <class P>
void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
if (False(image.flags & ImageFlagBits::CpuModified)) {
@@ -2209,7 +2274,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
if (new_id) {
const ImageViewBase& old_view = slot_image_views[new_id];
if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
uncommitted_downloads.push_back(old_view.image_id);
const PendingDownload new_download{true, 0, old_view.image_id};
uncommitted_downloads.emplace_back(new_download);
}
}
*old_id = new_id;

View File

@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
@@ -40,14 +40,9 @@ struct ChannelState;
namespace VideoCommon {
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TICEntry;
using Tegra::Texture::TSCEntry;
using VideoCore::Surface::GetFormatType;
using VideoCore::Surface::IsCopyCompatible;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using namespace Common::Literals;
struct ImageViewInOut {
@@ -119,6 +114,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer;
using AsyncBuffer = typename P::AsyncBuffer;
using BufferType = typename P::BufferType;
struct BlitImages {
ImageId dst_id;
@@ -215,6 +211,10 @@ public:
const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand,
const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image);
void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies,
GPUVAddr address = 0, size_t size = 0);
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
@@ -424,17 +424,32 @@ private:
u64 critical_memory;
size_t critical_gc;
struct BufferDownload {
GPUVAddr address;
size_t size;
};
struct PendingDownload {
bool is_swizzle;
size_t async_buffer_id;
SlotId object_id;
};
SlotVector<Image> slot_images;
SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
SlotVector<Framebuffer> slot_framebuffers;
SlotVector<BufferDownload> slot_buffer_downloads;
// TODO: This data structure is not optimal and it should be reworked
std::vector<ImageId> uncommitted_downloads;
std::deque<std::vector<ImageId>> committed_downloads;
std::deque<std::optional<AsyncBuffer>> async_buffers;
std::vector<PendingDownload> uncommitted_downloads;
std::deque<std::vector<PendingDownload>> committed_downloads;
std::vector<AsyncBuffer> uncommitted_async_buffers;
std::deque<std::vector<AsyncBuffer>> async_buffers;
std::deque<AsyncBuffer> async_buffers_death_ring;
struct LRUItemParams {
using ObjectType = ImageId;

View File

@@ -27,6 +27,7 @@
#include "configuration/configure_input.h"
#include "configuration/configure_per_game.h"
#include "configuration/configure_tas.h"
#include "core/file_sys/romfs_factory.h"
#include "core/file_sys/vfs.h"
#include "core/file_sys/vfs_real.h"
#include "core/frontend/applets/cabinet.h"
@@ -4171,6 +4172,8 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
}
Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();
bool all_keys_present{true};
if (keys.BaseDeriveNecessary()) {
Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory("", FileSys::Mode::Read)};
@@ -4195,6 +4198,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
errors += tr(" - Missing PRODINFO");
}
if (!errors.isEmpty()) {
all_keys_present = false;
QMessageBox::warning(
this, tr("Derivation Components Missing"),
tr("Encryption keys are missing. "
@@ -4222,11 +4226,40 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
system->GetFileSystemController().CreateFactories(*vfs);
if (all_keys_present && !this->CheckSystemArchiveDecryption()) {
LOG_WARNING(Frontend, "Mii model decryption failed");
QMessageBox::warning(
this, tr("System Archive Decryption Failed"),
tr("Encryption keys failed to decrypt firmware. "
"<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the yuzu "
"quickstart guide</a> to get all your keys, firmware and "
"games."));
}
if (behavior == ReinitializeKeyBehavior::Warning) {
game_list->PopulateAsync(UISettings::values.game_dirs);
}
}
bool GMainWindow::CheckSystemArchiveDecryption() {
constexpr u64 MiiModelId = 0x0100000000000802;
auto bis_system = system->GetFileSystemController().GetSystemNANDContents();
if (!bis_system) {
// Not having system BIS files is not an error.
return true;
}
auto mii_nca = bis_system->GetEntry(MiiModelId, FileSys::ContentRecordType::Data);
if (!mii_nca) {
// Not having the Mii model is not an error.
return true;
}
// Return whether we are able to decrypt the RomFS of the Mii model.
return mii_nca->GetRomFS().get() != nullptr;
}
std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed,
u64 program_id) {
const auto dlc_entries =

View File

@@ -392,6 +392,7 @@ private:
void LoadTranslation();
void OpenPerGameConfiguration(u64 title_id, const std::string& file_name);
bool CheckDarkMode();
bool CheckSystemArchiveDecryption();
QString GetTasStateDescription() const;
bool CreateShortcut(const std::string& shortcut_path, const std::string& title,