Compare commits

..

15 Commits

Author SHA1 Message Date
Kelebek1
eebf6e6770 Rework GC to be based on available card memory rather than hard-coded values, also skip non-gpu modified images from download. 2023-05-14 23:24:15 +01:00
liamwhite
e9069dfe76 Merge pull request #10288 from liamwhite/vram-limits
vulkan_device: reserve extra memory to prevent swaps
2023-05-14 17:02:15 -04:00
Liam
2be751100b vulkan_device: reserve extra memory to prevent swaps 2023-05-14 16:49:59 -04:00
bunnei
29c7176f55 Merge pull request #10286 from liamwhite/compatible-bits
vulkan_common: fix incompatible property flags
2023-05-14 01:23:29 -07:00
Liam
122435e080 vulkan_common: fix incompatible property flags 2023-05-14 01:13:11 -04:00
Fernando S
9c739f1506 Merge pull request #10244 from liamwhite/lower-upper
time: implement ContinuousAdjustmentTimePoint
2023-05-13 03:51:05 +02:00
Fernando S
075d73f076 Merge pull request #10243 from Kelebek1/red_dot
Correctly track render target index in the framebuffer for image aspects
2023-05-13 03:50:31 +02:00
bunnei
021e503cc8 Merge pull request #10237 from liamwhite/cache-storage
fs: stub cache storage
2023-05-12 16:42:17 -07:00
bunnei
1805de0301 Merge pull request #10236 from liamwhite/thats-not-an-ibinder
nvnflinger: fix Parcel serialization
2023-05-12 16:07:35 -07:00
Kelebek1
cd0ded7771 Correctly track RT indexes for image aspect lookup during clears 2023-05-12 01:40:21 +01:00
Liam
351079a4ba fs: adjust future save path 2023-05-11 17:30:30 -04:00
Liam
62bcb99ba8 am: stub CreateCacheStorage 2023-05-11 17:26:02 -04:00
Liam
13e4ceb990 fs: stub cache storage and fix params alignment 2023-05-11 17:23:28 -04:00
Liam
bb94beed15 nvnflinger: fix Parcel serialization 2023-05-11 17:09:19 -04:00
Liam
6e10a0c130 nvnflinger: fix producer slot fence init 2023-05-11 17:08:14 -04:00
18 changed files with 302 additions and 156 deletions

View File

@@ -82,9 +82,9 @@ std::string GetFutureSaveDataPath(SaveDataSpaceId space_id, SaveDataType type, u
// Only detect account/device saves from the future location.
switch (type) {
case SaveDataType::SaveData:
return fmt::format("{}/account/{}/{:016X}/1", space_id_path, uuid.RawString(), title_id);
return fmt::format("{}/account/{}/{:016X}/0", space_id_path, uuid.RawString(), title_id);
case SaveDataType::DeviceSaveData:
return fmt::format("{}/device/{:016X}/1", space_id_path, title_id);
return fmt::format("{}/device/{:016X}/0", space_id_path, title_id);
default:
return "";
}

View File

@@ -13,6 +13,7 @@
#include "core/file_sys/savedata_factory.h"
#include "core/hle/kernel/k_event.h"
#include "core/hle/kernel/k_transfer_memory.h"
#include "core/hle/result.h"
#include "core/hle/service/acc/profile_manager.h"
#include "core/hle/service/am/am.h"
#include "core/hle/service/am/applet_ae.h"
@@ -1335,7 +1336,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
{24, nullptr, "GetLaunchStorageInfoForDebug"},
{25, &IApplicationFunctions::ExtendSaveData, "ExtendSaveData"},
{26, &IApplicationFunctions::GetSaveDataSize, "GetSaveDataSize"},
{27, nullptr, "CreateCacheStorage"},
{27, &IApplicationFunctions::CreateCacheStorage, "CreateCacheStorage"},
{28, nullptr, "GetSaveDataSizeMax"},
{29, nullptr, "GetCacheStorageMax"},
{30, &IApplicationFunctions::BeginBlockingHomeButtonShortAndLongPressed, "BeginBlockingHomeButtonShortAndLongPressed"},
@@ -1738,6 +1739,36 @@ void IApplicationFunctions::GetSaveDataSize(HLERequestContext& ctx) {
rb.Push(size.journal);
}
void IApplicationFunctions::CreateCacheStorage(HLERequestContext& ctx) {
struct InputParameters {
u16 index;
s64 size;
s64 journal_size;
};
static_assert(sizeof(InputParameters) == 24);
struct OutputParameters {
u32 storage_target;
u64 required_size;
};
static_assert(sizeof(OutputParameters) == 16);
IPC::RequestParser rp{ctx};
const auto params = rp.PopRaw<InputParameters>();
LOG_WARNING(Service_AM, "(STUBBED) called with index={}, size={:#x}, journal_size={:#x}",
params.index, params.size, params.journal_size);
const OutputParameters resp{
.storage_target = 1,
.required_size = 0,
};
IPC::ResponseBuilder rb{ctx, 6};
rb.Push(ResultSuccess);
rb.PushRaw(resp);
}
void IApplicationFunctions::QueryApplicationPlayStatistics(HLERequestContext& ctx) {
LOG_WARNING(Service_AM, "(STUBBED) called");

View File

@@ -333,6 +333,7 @@ private:
void GetPseudoDeviceId(HLERequestContext& ctx);
void ExtendSaveData(HLERequestContext& ctx);
void GetSaveDataSize(HLERequestContext& ctx);
void CreateCacheStorage(HLERequestContext& ctx);
void BeginBlockingHomeButtonShortAndLongPressed(HLERequestContext& ctx);
void EndBlockingHomeButtonShortAndLongPressed(HLERequestContext& ctx);
void BeginBlockingHomeButton(HLERequestContext& ctx);

View File

@@ -24,8 +24,10 @@
#include "core/file_sys/savedata_factory.h"
#include "core/file_sys/system_archive/system_archive.h"
#include "core/file_sys/vfs.h"
#include "core/hle/result.h"
#include "core/hle/service/filesystem/filesystem.h"
#include "core/hle/service/filesystem/fsp_srv.h"
#include "core/hle/service/hle_ipc.h"
#include "core/hle/service/ipc_helpers.h"
#include "core/reporter.h"
@@ -552,9 +554,9 @@ public:
// Write the data to memory
ctx.WriteBuffer(begin, range_size);
IPC::ResponseBuilder rb{ctx, 3};
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(ResultSuccess);
rb.Push<u32>(static_cast<u32>(actual_entries));
rb.Push<u64>(actual_entries);
}
private:
@@ -712,7 +714,7 @@ FSP_SRV::FSP_SRV(Core::System& system_)
{59, nullptr, "WriteSaveDataFileSystemExtraData"},
{60, nullptr, "OpenSaveDataInfoReader"},
{61, &FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId, "OpenSaveDataInfoReaderBySaveDataSpaceId"},
{62, nullptr, "OpenCacheStorageList"},
{62, &FSP_SRV::OpenSaveDataInfoReaderOnlyCacheStorage, "OpenSaveDataInfoReaderOnlyCacheStorage"},
{64, nullptr, "OpenSaveDataInternalStorageFileSystem"},
{65, nullptr, "UpdateSaveDataMacForDebug"},
{66, nullptr, "WriteSaveDataFileSystemExtraData2"},
@@ -921,6 +923,15 @@ void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(HLERequestContext& ctx) {
std::make_shared<ISaveDataInfoReader>(system, space, fsc));
}
void FSP_SRV::OpenSaveDataInfoReaderOnlyCacheStorage(HLERequestContext& ctx) {
LOG_WARNING(Service_FS, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(ResultSuccess);
rb.PushIpcInterface<ISaveDataInfoReader>(system, FileSys::SaveDataSpaceId::TemporaryStorage,
fsc);
}
void FSP_SRV::WriteSaveDataFileSystemExtraDataBySaveDataAttribute(HLERequestContext& ctx) {
LOG_WARNING(Service_FS, "(STUBBED) called.");

View File

@@ -42,6 +42,7 @@ private:
void OpenSaveDataFileSystem(HLERequestContext& ctx);
void OpenReadOnlySaveDataFileSystem(HLERequestContext& ctx);
void OpenSaveDataInfoReaderBySaveDataSpaceId(HLERequestContext& ctx);
void OpenSaveDataInfoReaderOnlyCacheStorage(HLERequestContext& ctx);
void WriteSaveDataFileSystemExtraDataBySaveDataAttribute(HLERequestContext& ctx);
void ReadSaveDataFileSystemExtraDataWithMaskBySaveDataAttribute(HLERequestContext& ctx);
void OpenDataStorageByCurrentProcess(HLERequestContext& ctx);

View File

@@ -793,6 +793,7 @@ Status BufferQueueProducer::SetPreallocatedBuffer(s32 slot,
std::scoped_lock lock{core->mutex};
slots[slot] = {};
slots[slot].fence = Fence::NoFence();
slots[slot].graphic_buffer = buffer;
slots[slot].frame_number = 0;
@@ -854,7 +855,7 @@ void BufferQueueProducer::Transact(HLERequestContext& ctx, TransactionId code, u
status = DequeueBuffer(&slot, &fence, is_async, width, height, pixel_format, usage);
parcel_out.Write(slot);
parcel_out.WriteObject(&fence);
parcel_out.WriteFlattenedObject(&fence);
break;
}
case TransactionId::RequestBuffer: {
@@ -864,7 +865,7 @@ void BufferQueueProducer::Transact(HLERequestContext& ctx, TransactionId code, u
status = RequestBuffer(slot, &buf);
parcel_out.WriteObject(buf);
parcel_out.WriteFlattenedObject(buf);
break;
}
case TransactionId::QueueBuffer: {

View File

@@ -117,61 +117,67 @@ private:
class OutputParcel final {
public:
static constexpr std::size_t DefaultBufferSize = 0x40;
OutputParcel() : buffer(DefaultBufferSize) {}
template <typename T>
explicit OutputParcel(const T& out_data) : buffer(DefaultBufferSize) {
Write(out_data);
}
OutputParcel() = default;
template <typename T>
void Write(const T& val) {
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable.");
if (buffer.size() < write_index + sizeof(T)) {
buffer.resize(buffer.size() + sizeof(T) + DefaultBufferSize);
}
std::memcpy(buffer.data() + write_index, &val, sizeof(T));
write_index += sizeof(T);
write_index = Common::AlignUp(write_index, 4);
this->WriteImpl(val, m_data_buffer);
}
template <typename T>
void WriteObject(const T* ptr) {
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable.");
void WriteFlattenedObject(const T* ptr) {
if (!ptr) {
Write<u32>(0);
this->Write<u32>(0);
return;
}
Write<u32>(1);
Write<s64>(sizeof(T));
Write(*ptr);
this->Write<u32>(1);
this->Write<s64>(sizeof(T));
this->Write(*ptr);
}
template <typename T>
void WriteObject(const std::shared_ptr<T> ptr) {
WriteObject(ptr.get());
void WriteFlattenedObject(const std::shared_ptr<T> ptr) {
this->WriteFlattenedObject(ptr.get());
}
template <typename T>
void WriteInterface(const T& val) {
this->WriteImpl(val, m_data_buffer);
this->WriteImpl(0U, m_object_buffer);
}
std::vector<u8> Serialize() const {
ParcelHeader header{};
header.data_size = static_cast<u32>(write_index - sizeof(ParcelHeader));
header.data_offset = sizeof(ParcelHeader);
header.objects_size = 4;
header.objects_offset = static_cast<u32>(sizeof(ParcelHeader) + header.data_size);
std::memcpy(buffer.data(), &header, sizeof(ParcelHeader));
std::vector<u8> output_buffer(sizeof(ParcelHeader) + m_data_buffer.size() +
m_object_buffer.size());
return buffer;
ParcelHeader header{};
header.data_size = static_cast<u32>(m_data_buffer.size());
header.data_offset = sizeof(ParcelHeader);
header.objects_size = static_cast<u32>(m_object_buffer.size());
header.objects_offset = header.data_offset + header.data_size;
std::memcpy(output_buffer.data(), &header, sizeof(header));
std::ranges::copy(m_data_buffer, output_buffer.data() + header.data_offset);
std::ranges::copy(m_object_buffer, output_buffer.data() + header.objects_offset);
return output_buffer;
}
private:
mutable std::vector<u8> buffer;
std::size_t write_index = sizeof(ParcelHeader);
template <typename T>
requires(std::is_trivially_copyable_v<T>)
void WriteImpl(const T& val, std::vector<u8>& buffer) {
const size_t aligned_size = Common::AlignUp(sizeof(T), 4);
const size_t old_size = buffer.size();
buffer.resize(old_size + aligned_size);
std::memcpy(buffer.data() + old_size, &val, sizeof(T));
}
private:
std::vector<u8> m_data_buffer;
std::vector<u8> m_object_buffer;
};
} // namespace Service::android

View File

@@ -64,8 +64,8 @@ public:
private:
const u32 magic = 2;
const u32 process_id = 1;
const u32 id;
INSERT_PADDING_WORDS(3);
const u64 id;
INSERT_PADDING_WORDS(2);
std::array<u8, 8> dispdrv = {'d', 'i', 's', 'p', 'd', 'r', 'v', '\0'};
INSERT_PADDING_WORDS(2);
};
@@ -608,7 +608,9 @@ private:
return;
}
const auto parcel = android::OutputParcel{NativeWindow{*buffer_queue_id}};
android::OutputParcel parcel;
parcel.WriteInterface(NativeWindow{*buffer_queue_id});
const auto buffer_size = ctx.WriteBuffer(parcel.Serialize());
IPC::ResponseBuilder rb{ctx, 4};
@@ -654,7 +656,9 @@ private:
return;
}
const auto parcel = android::OutputParcel{NativeWindow{*buffer_queue_id}};
android::OutputParcel parcel;
parcel.WriteInterface(NativeWindow{*buffer_queue_id});
const auto buffer_size = ctx.WriteBuffer(parcel.Serialize());
IPC::ResponseBuilder rb{ctx, 6};

View File

@@ -23,42 +23,94 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
common_ranges.clear();
inline_buffer_id = NULL_BUFFER_ID;
if (!runtime.CanReportMemoryUsage()) {
minimum_memory = DEFAULT_EXPECTED_MEMORY;
critical_memory = DEFAULT_CRITICAL_MEMORY;
return;
}
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
const s64 min_spacing_critical = device_memory - 1_GiB;
const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
minimum_memory = static_cast<u64>(
std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
DEFAULT_EXPECTED_MEMORY));
critical_memory = static_cast<u64>(
std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
DEFAULT_CRITICAL_MEMORY));
const u64 device_mem_per = device_memory / 100;
minimum_memory = device_mem_per * 25;
expected_memory = device_mem_per * 50;
critical_memory = device_mem_per * 80;
LOG_INFO(HW_GPU, "Buffer cache device memory limits: min {} expected {} critical {}",
minimum_memory, expected_memory, critical_memory);
}
template <class P>
void BufferCache<P>::RunGarbageCollector() {
const bool aggressive_gc = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
int num_iterations = aggressive_gc ? 64 : 32;
const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
if (total_used_memory < minimum_memory) {
return;
}
bool is_expected = total_used_memory >= expected_memory;
bool is_critical = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = is_critical ? 60ULL : is_expected ? 120ULL : 240ULL;
size_t num_iterations = is_critical ? 40 : (is_expected ? 20 : 10);
boost::container::small_vector<std::pair<BufferId, VideoCommon::BufferCopies>, 40> to_delete;
u64 total_size{0};
const auto clean_up = [&](BufferId buffer_id) {
if (num_iterations == 0) {
return true;
}
--num_iterations;
auto& buffer = slot_buffers[buffer_id];
DownloadBufferMemory(buffer);
DeleteBuffer(buffer_id);
auto buffer_copies = FullDownloadCopies(buffer, buffer.CpuAddr(), buffer.SizeBytes());
total_size += buffer_copies.total_size;
to_delete.push_back({buffer_id, std::move(buffer_copies)});
return false;
};
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
if (total_size > 0) {
if constexpr (USE_MEMORY_MAPS) {
auto map = runtime.DownloadStagingBuffer(Common::AlignUp(total_size, 1024));
auto base_offset = map.offset;
for (auto& [buffer_id, buffer_copies] : to_delete) {
if (buffer_copies.total_size == 0) {
continue;
}
for (auto& copy : buffer_copies.copies) {
copy.dst_offset += map.offset;
}
auto& buffer = slot_buffers[buffer_id];
runtime.CopyBuffer(map.buffer, buffer, buffer_copies.copies);
map.offset += buffer_copies.total_size;
}
runtime.Finish();
for (auto& [buffer_id, buffer_copies] : to_delete) {
if (buffer_copies.total_size > 0) {
auto& buffer = slot_buffers[buffer_id];
for (const auto& copy : buffer_copies.copies) {
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
const u8* copy_mapped_memory =
map.mapped_span.data() + copy.dst_offset - base_offset;
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
}
}
DeleteBuffer(buffer_id);
}
} else {
for (auto& [buffer_id, buffer_copies] : to_delete) {
if (buffer_copies.total_size == 0) {
continue;
}
const std::span<u8> immediate_buffer = ImmediateBuffer(buffer_copies.total_size);
auto& buffer = slot_buffers[buffer_id];
for (const BufferCopy& copy : buffer_copies.copies) {
buffer.ImmediateDownload(copy.src_offset,
immediate_buffer.subspan(0, copy.size));
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
}
DeleteBuffer(buffer_id);
}
}
} else {
for (auto& [buffer_id, buffer_copies] : to_delete) {
DeleteBuffer(buffer_id);
}
}
}
template <class P>
@@ -77,12 +129,10 @@ void BufferCache<P>::TickFrame() {
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
// If we can obtain the memory info, use it instead of the estimate.
if (runtime.CanReportMemoryUsage()) {
if (runtime.CanReportMemoryUsage() && frame_tick % 60 == 0) {
total_used_memory = runtime.GetDeviceMemoryUsage();
}
if (total_used_memory >= minimum_memory) {
RunGarbageCollector();
}
RunGarbageCollector();
++frame_tick;
delayed_destruction_ring.Tick();
@@ -1556,17 +1606,13 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
}
template <class P>
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
}
template <class P>
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
boost::container::small_vector<BufferCopy, 1> copies;
VideoCommon::BufferCopies BufferCache<P>::FullDownloadCopies(Buffer& buffer, VAddr cpu_addr,
u64 size, bool clear) {
boost::container::small_vector<BufferCopy, 16> copies;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
memory_tracker.ForEachDownloadRangeAndClear(
cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
memory_tracker.ForEachDownloadRange(
cpu_addr, size, clear, [&](u64 cpu_addr_out, u64 range_size) {
const VAddr buffer_addr = buffer.CpuAddr();
const auto add_download = [&](VAddr start, VAddr end) {
const u64 new_offset = start - buffer_addr;
@@ -1590,22 +1636,35 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
ClearDownload(subtract_interval);
common_ranges.subtract(subtract_interval);
});
if (total_size_bytes == 0) {
return {total_size_bytes, largest_copy, std::move(copies)};
}
template <class P>
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
}
template <class P>
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
auto buffer_copies = FullDownloadCopies(buffer, cpu_addr, size);
if (buffer_copies.total_size == 0) {
return;
}
MICROPROFILE_SCOPE(GPU_DownloadMemory);
if constexpr (USE_MEMORY_MAPS) {
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
auto download_staging = runtime.DownloadStagingBuffer(buffer_copies.total_size);
const u8* const mapped_memory = download_staging.mapped_span.data();
const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
for (BufferCopy& copy : copies) {
const std::span<BufferCopy> copies_span(buffer_copies.copies.data(),
buffer_copies.copies.size());
for (BufferCopy& copy : buffer_copies.copies) {
// Modify copies to have the staging offset in mind
copy.dst_offset += download_staging.offset;
}
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
runtime.Finish();
for (const BufferCopy& copy : copies) {
for (const BufferCopy& copy : buffer_copies.copies) {
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
// Undo the modified offset
const u64 dst_offset = copy.dst_offset - download_staging.offset;
@@ -1613,8 +1672,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
}
} else {
const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
for (const BufferCopy& copy : copies) {
const std::span<u8> immediate_buffer = ImmediateBuffer(buffer_copies.largest_copy);
for (const BufferCopy& copy : buffer_copies.copies) {
buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);

View File

@@ -57,8 +57,6 @@ MICROPROFILE_DECLARE(GPU_PrepareBuffers);
MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
MICROPROFILE_DECLARE(GPU_DownloadMemory);
using BufferId = SlotId;
using VideoCore::Surface::PixelFormat;
using namespace Common::Literals;
@@ -466,6 +464,9 @@ private:
void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
[[nodiscard]] VideoCommon::BufferCopies FullDownloadCopies(Buffer& buffer, VAddr cpu_addr,
u64 size, bool clear = true);
void DownloadBufferMemory(Buffer& buffer_id);
void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
@@ -569,6 +570,7 @@ private:
u64 frame_tick = 0;
u64 total_used_memory = 0;
u64 minimum_memory = 0;
u64 expected_memory = 0;
u64 critical_memory = 0;
BufferId inline_buffer_id;

View File

@@ -1864,6 +1864,7 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
num_layers = std::max(num_layers, color_buffer->range.extent.layers);
images[num_images] = color_buffer->ImageHandle();
image_ranges[num_images] = MakeSubresourceRange(color_buffer);
rt_map[index] = num_images;
samples = color_buffer->Samples();
++num_images;
}

View File

@@ -334,7 +334,7 @@ public:
}
[[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
return (image_ranges.at(rt_map[index]).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
}
[[nodiscard]] bool HasAspectDepthBit() const noexcept {
@@ -354,6 +354,7 @@ private:
u32 num_images = 0;
std::array<VkImage, 9> images{};
std::array<VkImageSubresourceRange, 9> image_ranges{};
std::array<size_t, NUM_RT> rt_map{};
bool has_depth{};
bool has_stencil{};
};

View File

@@ -47,35 +47,31 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
void(slot_image_views.insert(runtime, NullImageViewParams{}));
void(slot_samplers.insert(runtime, sampler_descriptor));
if constexpr (HAS_DEVICE_MEMORY_INFO) {
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
const s64 min_spacing_critical = device_memory - 1_GiB;
const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
expected_memory = static_cast<u64>(
std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
DEFAULT_EXPECTED_MEMORY));
critical_memory = static_cast<u64>(
std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
DEFAULT_CRITICAL_MEMORY));
minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2);
} else {
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
minimum_memory = 0;
}
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
const u64 device_mem_per = device_memory / 100;
minimum_memory = device_mem_per * 25;
expected_memory = device_mem_per * 50;
critical_memory = device_mem_per * 80;
LOG_INFO(HW_GPU, "Texture cache device memory limits: min {} expected {} critical {}",
minimum_memory, expected_memory, critical_memory);
}
template <class P>
void TextureCache<P>::RunGarbageCollector() {
bool high_priority_mode = total_used_memory >= expected_memory;
bool aggressive_mode = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
const auto clean_up = [this, &num_iterations, &high_priority_mode,
&aggressive_mode](ImageId image_id) {
if (total_used_memory < minimum_memory) {
return;
}
bool is_expected = total_used_memory >= expected_memory;
bool is_critical = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = is_critical ? 10ULL : is_expected ? 25ULL : 50ULL;
size_t num_iterations = is_critical ? 40 : (is_expected ? 20 : 10);
boost::container::small_vector<
std::tuple<ImageId, bool, boost::container::small_vector<BufferImageCopy, 16>>, 40>
to_delete;
u64 total_download_size{0};
u32 largest_download_size{0};
const auto clean_up = [&](ImageId image_id) {
if (num_iterations == 0) {
return true;
}
@@ -86,51 +82,70 @@ void TextureCache<P>::RunGarbageCollector() {
// used by the async decoder thread.
return false;
}
const bool must_download =
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
if (!high_priority_mode &&
(must_download || True(image.flags & ImageFlagBits::CostlyLoad))) {
return false;
}
if (must_download) {
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
if (total_used_memory < critical_memory) {
if (aggressive_mode) {
// Sink the aggresiveness.
num_iterations >>= 2;
aggressive_mode = false;
return false;
}
if (high_priority_mode && total_used_memory < expected_memory) {
num_iterations >>= 1;
high_priority_mode = false;
}
const bool do_download = image.IsSafeDownload() &&
False(image.flags & ImageFlagBits::BadOverlap) &&
(False(image.flags & ImageFlagBits::CostlyLoad) || is_critical);
if (do_download) {
total_download_size += image.unswizzled_size_bytes;
largest_download_size = std::max(largest_download_size, image.unswizzled_size_bytes);
}
to_delete.push_back({image_id, do_download, {}});
return false;
};
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
if (total_download_size > 0) {
auto map = runtime.DownloadStagingBuffer(total_download_size);
for (auto& [image_id, do_download, copies] : to_delete) {
if (!do_download) {
continue;
}
Image& image = slot_images[image_id];
copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
}
runtime.Finish();
swizzle_data_buffer.resize_destructive(Common::AlignUp(largest_download_size, 1024));
u64 offset{0};
for (auto& [image_id, do_download, copies] : to_delete) {
Image& image = slot_images[image_id];
if (do_download) {
for (auto& copy : copies) {
copy.buffer_offset += offset;
}
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer);
offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
}
} else {
for (auto& [image_id, do_download, copies] : to_delete) {
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
}
}
}
template <class P>
void TextureCache<P>::TickFrame() {
// If we can obtain the memory info, use it instead of the estimate.
if (runtime.CanReportMemoryUsage()) {
if (runtime.CanReportMemoryUsage() && frame_tick % 60 == 0) {
total_used_memory = runtime.GetDeviceMemoryUsage();
}
if (total_used_memory > minimum_memory) {
RunGarbageCollector();
}
RunGarbageCollector();
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();

View File

@@ -3,6 +3,8 @@
#pragma once
#include <boost/container/small_vector.hpp>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/texture_cache/slot_vector.h"
@@ -14,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14;
constexpr SlotId CORRUPT_ID{0xfffffffe};
using BufferId = SlotId;
using ImageId = SlotId;
using ImageMapId = SlotId;
using ImageViewId = SlotId;
@@ -146,6 +149,12 @@ struct BufferCopy {
size_t size;
};
struct BufferCopies {
u64 total_size;
u64 largest_copy;
boost::container::small_vector<BufferCopy, 16> copies;
};
struct SwizzleParameters {
Extent3D num_tiles;
Extent3D block;

View File

@@ -914,7 +914,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
}
}
std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
const Extent3D size = info.size;
const u32 bytes_per_block = BytesPerBlock(info.format);
if (info.type == ImageType::Linear) {
@@ -942,7 +942,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
u32 host_offset = 0;
std::vector<BufferImageCopy> copies(num_levels);
boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);

View File

@@ -5,6 +5,7 @@
#include <optional>
#include <span>
#include <boost/container/small_vector.hpp>
#include "common/common_types.h"
#include "common/scratch_buffer.h"
@@ -73,7 +74,8 @@ struct OverlapResult {
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies);
[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
const ImageInfo& info);
[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);

View File

@@ -1009,6 +1009,8 @@ void Device::CollectPhysicalMemoryInfo() {
device_access_memory += mem_properties.memoryHeaps[element].size;
}
if (!is_integrated) {
const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB);
device_access_memory -= reserve_memory;
return;
}
const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage);

View File

@@ -147,7 +147,7 @@ public:
/// Returns whether this allocation is compatible with the arguments.
[[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
return (flags & property_flags) == property_flags && (type_mask & shifted_memory_type) != 0;
return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0;
}
private: