Compare commits

..

1 Commits

Author SHA1 Message Date
Andrea Pappacoda
02ca46d394 build: use system SoundTouch when available
Checking for both a CMake config file and a pkg-config file is needed
because some distros only ship the latter.

Related to #6833 and #7044
2022-03-27 13:17:37 +02:00
20 changed files with 685 additions and 147 deletions

View File

@@ -69,7 +69,12 @@ if (YUZU_USE_EXTERNAL_SDL2)
endif()
# SoundTouch
add_subdirectory(soundtouch)
find_package(SoundTouch MODULE)
if (NOT SoundTouch_FOUND)
message(STATUS "SoundTouch not found, falling back to externals")
add_subdirectory(soundtouch)
add_library(SoundTouch::SoundTouch ALIAS SoundTouch)
endif()
# Cubeb
if(ENABLE_CUBEB)

View File

@@ -0,0 +1,63 @@
# SPDX-FileCopyrightText: 2022 Andrea Pappacoda <andrea@pappacoda.it>
#
# SPDX-License-Identifier: GPL-2.0-or-later
# This find module looks for SoundTouch with both a CMake Config file and a
# pkg-config. If the library is found, it checks if it enforces 32 bit
# floating point samples, and if not it defines SOUNDTOUCH_INTEGER_SAMPLES,
# just like in the bundled library.
find_package(SoundTouch CONFIG)
if (SoundTouch_FOUND)
set(_st_real_name "SoundTouch::SoundTouch")
else()
find_package(PkgConfig QUIET)
if (PKG_CONFIG_FOUND)
pkg_search_module(SoundTouch IMPORTED_TARGET soundtouch)
if (SoundTouch_FOUND)
set_target_properties(PkgConfig::SoundTouch PROPERTIES IMPORTED_GLOBAL True)
add_library(SoundTouch::SoundTouch ALIAS PkgConfig::SoundTouch)
# Need to set this variable because CMake doesn't allow to add
# compile definitions to ALIAS targets
set(_st_real_name "PkgConfig::SoundTouch")
endif()
endif()
endif()
if (SoundTouch_FOUND)
find_path(_st_include_dir "soundtouch/soundtouch_config.h")
file(READ "${_st_include_dir}/soundtouch/soundtouch_config.h" _st_config_file)
# Check if the config file defines SOUNDTOUCH_FLOAT_SAMPLES
string(REGEX MATCH "#define[ ]+SOUNDTOUCH_FLOAT_SAMPLES[ ]+1" SoundTouch_FLOAT_SAMPLES ${_st_config_file})
if (NOT SoundTouch_FLOAT_SAMPLES)
target_compile_definitions(${_st_real_name} INTERFACE "SOUNDTOUCH_INTEGER_SAMPLES=1")
set(SoundTouch_INTEGER_SAMPLES True)
else()
# Check if SoundTouch supports SOUNDTOUCH_NO_CONFIG
file(READ "${_st_include_dir}/soundtouch/STTypes.h" _st_types_file)
string(FIND "${_st_types_file}" "SOUNDTOUCH_NO_CONFIG" SoundTouch_NO_CONFIG)
# if found
if (NOT SoundTouch_NO_CONFIG EQUAL "-1")
target_compile_definitions(${_st_real_name} INTERFACE "SOUNDTOUCH_NO_CONFIG" "SOUNDTOUCH_INTEGER_SAMPLES=1")
set(SoundTouch_INTEGER_SAMPLES True)
endif()
unset(_st_types_file)
endif()
unset(_st_real_name)
unset(_st_include_dir)
unset(_st_config_file)
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(SoundTouch
REQUIRED_VARS
SoundTouch_FOUND
SoundTouch_INTEGER_SAMPLES
VERSION_VAR SoundTouch_VERSION
)

View File

@@ -63,7 +63,7 @@ if (NOT MSVC)
endif()
target_link_libraries(audio_core PUBLIC common core)
target_link_libraries(audio_core PRIVATE SoundTouch)
target_link_libraries(audio_core PRIVATE SoundTouch::SoundTouch)
if(ENABLE_CUBEB)
target_link_libraries(audio_core PRIVATE cubeb)

View File

@@ -93,19 +93,17 @@ public:
static constexpr u64 ICACHE_LINE_SIZE = 64;
const u64 cache_line_start = value & ~(ICACHE_LINE_SIZE - 1);
parent.system.InvalidateCpuInstructionCacheRange(cache_line_start, ICACHE_LINE_SIZE);
parent.InvalidateCacheRange(cache_line_start, ICACHE_LINE_SIZE);
break;
}
case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoU:
parent.system.InvalidateCpuInstructionCaches();
parent.ClearInstructionCache();
break;
case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoUInnerSharable:
default:
LOG_DEBUG(Core_ARM, "Unprocesseed instruction cache operation: {}", op);
break;
}
parent.jit->HaltExecution();
}
void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override {

View File

@@ -21,7 +21,7 @@ Status BufferItemConsumer::AcquireBuffer(BufferItem* item, std::chrono::nanoseco
return Status::BadValue;
}
std::scoped_lock lock(mutex);
std::unique_lock lock(mutex);
if (const auto status = AcquireBufferLocked(item, present_when); status != Status::NoError) {
if (status != Status::NoBufferAvailable) {
@@ -40,7 +40,7 @@ Status BufferItemConsumer::AcquireBuffer(BufferItem* item, std::chrono::nanoseco
}
Status BufferItemConsumer::ReleaseBuffer(const BufferItem& item, Fence& release_fence) {
std::scoped_lock lock(mutex);
std::unique_lock lock(mutex);
if (const auto status = AddReleaseFenceLocked(item.buf, item.graphic_buffer, release_fence);
status != Status::NoError) {

View File

@@ -0,0 +1,206 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
namespace Service::NVFlinger {
BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
KernelHelpers::ServiceContext& service_context_)
: id(id_), layer_id(layer_id_), service_context{service_context_} {
buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent");
}
BufferQueue::~BufferQueue() {
service_context.CloseEvent(buffer_wait_event);
}
void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) {
ASSERT(slot < buffer_slots);
LOG_WARNING(Service, "Adding graphics buffer {}", slot);
{
std::unique_lock lock{free_buffers_mutex};
free_buffers.push_back(slot);
}
free_buffers_condition.notify_one();
buffers[slot] = {
.slot = slot,
.status = Buffer::Status::Free,
.igbp_buffer = igbp_buffer,
.transform = {},
.crop_rect = {},
.swap_interval = 0,
.multi_fence = {},
};
buffer_wait_event->GetWritableEvent().Signal();
}
std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
u32 height) {
// Wait for first request before trying to dequeue
{
std::unique_lock lock{free_buffers_mutex};
free_buffers_condition.wait(lock, [this] { return !free_buffers.empty() || !is_connect; });
}
if (!is_connect) {
// Buffer was disconnected while the thread was blocked, this is most likely due to
// emulation being stopped
return std::nullopt;
}
std::unique_lock lock{free_buffers_mutex};
auto f_itr = free_buffers.begin();
auto slot = buffers.size();
while (f_itr != free_buffers.end()) {
const Buffer& buffer = buffers[*f_itr];
if (buffer.status == Buffer::Status::Free && buffer.igbp_buffer.width == width &&
buffer.igbp_buffer.height == height) {
slot = *f_itr;
free_buffers.erase(f_itr);
break;
}
++f_itr;
}
if (slot == buffers.size()) {
return std::nullopt;
}
buffers[slot].status = Buffer::Status::Dequeued;
return {{buffers[slot].slot, &buffers[slot].multi_fence}};
}
const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
ASSERT(slot < buffers.size());
ASSERT(buffers[slot].status == Buffer::Status::Dequeued);
ASSERT(buffers[slot].slot == slot);
return buffers[slot].igbp_buffer;
}
void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect, u32 swap_interval,
Service::Nvidia::MultiFence& multi_fence) {
ASSERT(slot < buffers.size());
ASSERT(buffers[slot].status == Buffer::Status::Dequeued);
ASSERT(buffers[slot].slot == slot);
buffers[slot].status = Buffer::Status::Queued;
buffers[slot].transform = transform;
buffers[slot].crop_rect = crop_rect;
buffers[slot].swap_interval = swap_interval;
buffers[slot].multi_fence = multi_fence;
std::unique_lock lock{queue_sequence_mutex};
queue_sequence.push_back(slot);
}
void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence) {
ASSERT(slot < buffers.size());
ASSERT(buffers[slot].status != Buffer::Status::Free);
ASSERT(buffers[slot].slot == slot);
buffers[slot].status = Buffer::Status::Free;
buffers[slot].multi_fence = multi_fence;
buffers[slot].swap_interval = 0;
{
std::unique_lock lock{free_buffers_mutex};
free_buffers.push_back(slot);
}
free_buffers_condition.notify_one();
buffer_wait_event->GetWritableEvent().Signal();
}
std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
std::unique_lock lock{queue_sequence_mutex};
std::size_t buffer_slot = buffers.size();
// Iterate to find a queued buffer matching the requested slot.
while (buffer_slot == buffers.size() && !queue_sequence.empty()) {
const auto slot = static_cast<std::size_t>(queue_sequence.front());
ASSERT(slot < buffers.size());
if (buffers[slot].status == Buffer::Status::Queued) {
ASSERT(buffers[slot].slot == slot);
buffer_slot = slot;
}
queue_sequence.pop_front();
}
if (buffer_slot == buffers.size()) {
return std::nullopt;
}
buffers[buffer_slot].status = Buffer::Status::Acquired;
return {{buffers[buffer_slot]}};
}
void BufferQueue::ReleaseBuffer(u32 slot) {
ASSERT(slot < buffers.size());
ASSERT(buffers[slot].status == Buffer::Status::Acquired);
ASSERT(buffers[slot].slot == slot);
buffers[slot].status = Buffer::Status::Free;
{
std::unique_lock lock{free_buffers_mutex};
free_buffers.push_back(slot);
}
free_buffers_condition.notify_one();
buffer_wait_event->GetWritableEvent().Signal();
}
void BufferQueue::Connect() {
std::unique_lock lock{queue_sequence_mutex};
queue_sequence.clear();
is_connect = true;
}
void BufferQueue::Disconnect() {
buffers.fill({});
{
std::unique_lock lock{queue_sequence_mutex};
queue_sequence.clear();
}
buffer_wait_event->GetWritableEvent().Signal();
is_connect = false;
free_buffers_condition.notify_one();
}
u32 BufferQueue::Query(QueryType type) {
LOG_WARNING(Service, "(STUBBED) called type={}", type);
switch (type) {
case QueryType::NativeWindowFormat:
return static_cast<u32>(PixelFormat::RGBA8888);
case QueryType::NativeWindowWidth:
case QueryType::NativeWindowHeight:
break;
case QueryType::NativeWindowMinUndequeuedBuffers:
return 0;
case QueryType::NativeWindowConsumerUsageBits:
return 0;
}
UNIMPLEMENTED_MSG("Unimplemented query type={}", type);
return 0;
}
Kernel::KWritableEvent& BufferQueue::GetWritableBufferWaitEvent() {
return buffer_wait_event->GetWritableEvent();
}
Kernel::KReadableEvent& BufferQueue::GetBufferWaitEvent() {
return buffer_wait_event->GetReadableEvent();
}
} // namespace Service::NVFlinger

View File

@@ -0,0 +1,154 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <condition_variable>
#include <list>
#include <mutex>
#include <optional>
#include "common/common_funcs.h"
#include "common/math_util.h"
#include "common/swap.h"
#include "core/hle/kernel/k_event.h"
#include "core/hle/kernel/k_readable_event.h"
#include "core/hle/service/nvdrv/nvdata.h"
namespace Kernel {
class KernelCore;
class KEvent;
class KReadableEvent;
class KWritableEvent;
} // namespace Kernel
namespace Service::KernelHelpers {
class ServiceContext;
} // namespace Service::KernelHelpers
namespace Service::NVFlinger {
constexpr u32 buffer_slots = 0x40;
struct IGBPBuffer {
u32_le magic;
u32_le width;
u32_le height;
u32_le stride;
u32_le format;
u32_le usage;
INSERT_PADDING_WORDS(1);
u32_le index;
INSERT_PADDING_WORDS(3);
u32_le gpu_buffer_id;
INSERT_PADDING_WORDS(6);
u32_le external_format;
INSERT_PADDING_WORDS(10);
u32_le nvmap_handle;
u32_le offset;
INSERT_PADDING_WORDS(60);
};
static_assert(sizeof(IGBPBuffer) == 0x16C, "IGBPBuffer has wrong size");
class BufferQueue final {
public:
enum class QueryType {
NativeWindowWidth = 0,
NativeWindowHeight = 1,
NativeWindowFormat = 2,
/// The minimum number of buffers that must remain un-dequeued after a buffer has been
/// queued
NativeWindowMinUndequeuedBuffers = 3,
/// The consumer gralloc usage bits currently set by the consumer
NativeWindowConsumerUsageBits = 10,
};
explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
KernelHelpers::ServiceContext& service_context_);
~BufferQueue();
enum class BufferTransformFlags : u32 {
/// No transform flags are set
Unset = 0x00,
/// Flip source image horizontally (around the vertical axis)
FlipH = 0x01,
/// Flip source image vertically (around the horizontal axis)
FlipV = 0x02,
/// Rotate source image 90 degrees clockwise
Rotate90 = 0x04,
/// Rotate source image 180 degrees
Rotate180 = 0x03,
/// Rotate source image 270 degrees clockwise
Rotate270 = 0x07,
};
enum class PixelFormat : u32 {
RGBA8888 = 1,
RGBX8888 = 2,
RGB888 = 3,
RGB565 = 4,
BGRA8888 = 5,
RGBA5551 = 6,
RRGBA4444 = 7,
};
struct Buffer {
enum class Status { Free = 0, Queued = 1, Dequeued = 2, Acquired = 3 };
u32 slot;
Status status = Status::Free;
IGBPBuffer igbp_buffer;
BufferTransformFlags transform;
Common::Rectangle<int> crop_rect;
u32 swap_interval;
Service::Nvidia::MultiFence multi_fence;
};
void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> DequeueBuffer(u32 width,
u32 height);
const IGBPBuffer& RequestBuffer(u32 slot) const;
void QueueBuffer(u32 slot, BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect, u32 swap_interval,
Service::Nvidia::MultiFence& multi_fence);
void CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence);
std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
void ReleaseBuffer(u32 slot);
void Connect();
void Disconnect();
u32 Query(QueryType type);
u32 GetId() const {
return id;
}
bool IsConnected() const {
return is_connect;
}
Kernel::KWritableEvent& GetWritableBufferWaitEvent();
Kernel::KReadableEvent& GetBufferWaitEvent();
private:
BufferQueue(const BufferQueue&) = delete;
u32 id{};
u64 layer_id{};
std::atomic_bool is_connect{};
std::list<u32> free_buffers;
std::array<Buffer, buffer_slots> buffers;
std::list<u32> queue_sequence;
Kernel::KEvent* buffer_wait_event{};
std::mutex free_buffers_mutex;
std::condition_variable free_buffers_condition;
std::mutex queue_sequence_mutex;
KernelHelpers::ServiceContext& service_context;
};
} // namespace Service::NVFlinger

View File

@@ -20,102 +20,122 @@ BufferQueueConsumer::~BufferQueueConsumer() = default;
Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer,
std::chrono::nanoseconds expected_present,
u64 max_frame_number) {
std::scoped_lock lock(core->mutex);
s32 num_dropped_buffers{};
// Check that the consumer doesn't currently have the maximum number of buffers acquired.
const s32 num_acquired_buffers{
static_cast<s32>(std::count_if(slots.begin(), slots.end(), [](const auto& slot) {
return slot.buffer_state == BufferState::Acquired;
}))};
std::shared_ptr<IProducerListener> listener;
{
std::unique_lock lock(core->mutex);
if (num_acquired_buffers >= core->max_acquired_buffer_count + 1) {
LOG_ERROR(Service_NVFlinger, "max acquired buffer count reached: {} (max {})",
num_acquired_buffers, core->max_acquired_buffer_count);
return Status::InvalidOperation;
}
// Check that the consumer doesn't currently have the maximum number of buffers acquired.
const s32 num_acquired_buffers{
static_cast<s32>(std::count_if(slots.begin(), slots.end(), [](const auto& slot) {
return slot.buffer_state == BufferState::Acquired;
}))};
// Check if the queue is empty.
if (core->queue.empty()) {
return Status::NoBufferAvailable;
}
if (num_acquired_buffers >= core->max_acquired_buffer_count + 1) {
LOG_ERROR(Service_NVFlinger, "max acquired buffer count reached: {} (max {})",
num_acquired_buffers, core->max_acquired_buffer_count);
return Status::InvalidOperation;
}
auto front(core->queue.begin());
// Check if the queue is empty.
if (core->queue.empty()) {
return Status::NoBufferAvailable;
}
// If expected_present is specified, we may not want to return a buffer yet.
if (expected_present.count() != 0) {
constexpr auto MAX_REASONABLE_NSEC = 1000000000LL; // 1 second
auto front(core->queue.begin());
// The expected_present argument indicates when the buffer is expected to be presented
// on-screen.
while (core->queue.size() > 1 && !core->queue[0].is_auto_timestamp) {
const auto& buffer_item{core->queue[1]};
// If expected_present is specified, we may not want to return a buffer yet.
if (expected_present.count() != 0) {
constexpr auto MAX_REASONABLE_NSEC = 1000000000LL; // 1 second
// If dropping entry[0] would leave us with a buffer that the consumer is not yet ready
// for, don't drop it.
if (max_frame_number && buffer_item.frame_number > max_frame_number) {
break;
// The expected_present argument indicates when the buffer is expected to be
// presented on-screen.
while (core->queue.size() > 1 && !core->queue[0].is_auto_timestamp) {
const auto& buffer_item{core->queue[1]};
// If dropping entry[0] would leave us with a buffer that the consumer is not yet
// ready for, don't drop it.
if (max_frame_number && buffer_item.frame_number > max_frame_number) {
break;
}
// If entry[1] is timely, drop entry[0] (and repeat).
const auto desired_present = buffer_item.timestamp;
if (desired_present < expected_present.count() - MAX_REASONABLE_NSEC ||
desired_present > expected_present.count()) {
// This buffer is set to display in the near future, or desired_present is
// garbage.
LOG_DEBUG(Service_NVFlinger, "nodrop desire={} expect={}", desired_present,
expected_present.count());
break;
}
LOG_DEBUG(Service_NVFlinger, "drop desire={} expect={} size={}", desired_present,
expected_present.count(), core->queue.size());
if (core->StillTracking(*front)) {
// Front buffer is still in mSlots, so mark the slot as free
slots[front->slot].buffer_state = BufferState::Free;
core->free_buffers.push_back(front->slot);
listener = core->connected_producer_listener;
++num_dropped_buffers;
}
core->queue.erase(front);
front = core->queue.begin();
}
// If entry[1] is timely, drop entry[0] (and repeat).
const auto desired_present = buffer_item.timestamp;
if (desired_present < expected_present.count() - MAX_REASONABLE_NSEC ||
desired_present > expected_present.count()) {
// This buffer is set to display in the near future, or desired_present is garbage.
LOG_DEBUG(Service_NVFlinger, "nodrop desire={} expect={}", desired_present,
// See if the front buffer is ready to be acquired.
const auto desired_present = front->timestamp;
const auto buffer_is_due =
desired_present <= expected_present.count() ||
desired_present > expected_present.count() + MAX_REASONABLE_NSEC;
const auto consumer_is_ready =
max_frame_number > 0 ? front->frame_number <= max_frame_number : true;
if (!buffer_is_due || !consumer_is_ready) {
LOG_DEBUG(Service_NVFlinger, "defer desire={} expect={}", desired_present,
expected_present.count());
break;
return Status::PresentLater;
}
LOG_DEBUG(Service_NVFlinger, "drop desire={} expect={} size={}", desired_present,
expected_present.count(), core->queue.size());
if (core->StillTracking(*front)) {
// Front buffer is still in mSlots, so mark the slot as free
slots[front->slot].buffer_state = BufferState::Free;
}
core->queue.erase(front);
front = core->queue.begin();
}
// See if the front buffer is ready to be acquired.
const auto desired_present = front->timestamp;
if (desired_present > expected_present.count() &&
desired_present < expected_present.count() + MAX_REASONABLE_NSEC) {
LOG_DEBUG(Service_NVFlinger, "defer desire={} expect={}", desired_present,
LOG_DEBUG(Service_NVFlinger, "accept desire={} expect={}", desired_present,
expected_present.count());
return Status::PresentLater;
}
LOG_DEBUG(Service_NVFlinger, "accept desire={} expect={}", desired_present,
expected_present.count());
const auto slot = front->slot;
*out_buffer = *front;
LOG_DEBUG(Service_NVFlinger, "acquiring slot={}", slot);
// If the front buffer is still being tracked, update its slot state
if (core->StillTracking(*front)) {
slots[slot].acquire_called = true;
slots[slot].needs_cleanup_on_release = false;
slots[slot].buffer_state = BufferState::Acquired;
slots[slot].fence = Fence::NoFence();
}
// If the buffer has previously been acquired by the consumer, set graphic_buffer to nullptr
// to avoid unnecessarily remapping this buffer on the consumer side.
if (out_buffer->acquire_called) {
out_buffer->graphic_buffer = nullptr;
}
core->queue.erase(front);
// We might have freed a slot while dropping old buffers, or the producer may be blocked
// waiting for the number of buffers in the queue to decrease.
core->SignalDequeueCondition();
}
const auto slot = front->slot;
*out_buffer = *front;
LOG_DEBUG(Service_NVFlinger, "acquiring slot={}", slot);
// If the front buffer is still being tracked, update its slot state
if (core->StillTracking(*front)) {
slots[slot].acquire_called = true;
slots[slot].needs_cleanup_on_release = false;
slots[slot].buffer_state = BufferState::Acquired;
slots[slot].fence = Fence::NoFence();
if (listener != nullptr) {
for (s32 i = 0; i < num_dropped_buffers; ++i) {
listener->OnBufferReleased();
}
}
// If the buffer has previously been acquired by the consumer, set graphic_buffer to nullptr to
// avoid unnecessarily remapping this buffer on the consumer side.
if (out_buffer->acquire_called) {
out_buffer->graphic_buffer = nullptr;
}
core->queue.erase(front);
// We might have freed a slot while dropping old buffers, or the producer may be blocked
// waiting for the number of buffers in the queue to decrease.
core->SignalDequeueCondition();
return Status::NoError;
}
@@ -127,7 +147,7 @@ Status BufferQueueConsumer::ReleaseBuffer(s32 slot, u64 frame_number, const Fenc
std::shared_ptr<IProducerListener> listener;
{
std::scoped_lock lock(core->mutex);
std::unique_lock lock(core->mutex);
// If the frame number has changed because the buffer has been reallocated, we can ignore
// this ReleaseBuffer for the old buffer.
@@ -150,6 +170,8 @@ Status BufferQueueConsumer::ReleaseBuffer(s32 slot, u64 frame_number, const Fenc
slots[slot].fence = release_fence;
slots[slot].buffer_state = BufferState::Free;
core->free_buffers.push_back(slot);
listener = core->connected_producer_listener;
LOG_DEBUG(Service_NVFlinger, "releasing slot {}", slot);
@@ -167,7 +189,7 @@ Status BufferQueueConsumer::ReleaseBuffer(s32 slot, u64 frame_number, const Fenc
return Status::BadValue;
}
core->SignalDequeueCondition();
core->dequeue_condition.notify_all();
}
// Call back without lock held
@@ -187,7 +209,7 @@ Status BufferQueueConsumer::Connect(std::shared_ptr<IConsumerListener> consumer_
LOG_DEBUG(Service_NVFlinger, "controlled_by_app={}", controlled_by_app);
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");

View File

@@ -10,12 +10,16 @@
namespace Service::android {
BufferQueueCore::BufferQueueCore() = default;
BufferQueueCore::BufferQueueCore() : lock{mutex, std::defer_lock} {
for (s32 slot = 0; slot < BufferQueueDefs::NUM_BUFFER_SLOTS; ++slot) {
free_slots.insert(slot);
}
}
BufferQueueCore::~BufferQueueCore() = default;
void BufferQueueCore::NotifyShutdown() {
std::scoped_lock lock(mutex);
std::unique_lock lk(mutex);
is_shutting_down = true;
@@ -31,7 +35,7 @@ bool BufferQueueCore::WaitForDequeueCondition() {
return false;
}
dequeue_condition.wait(mutex);
dequeue_condition.wait(lock);
return true;
}
@@ -82,15 +86,26 @@ s32 BufferQueueCore::GetPreallocatedBufferCountLocked() const {
void BufferQueueCore::FreeBufferLocked(s32 slot) {
LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
const auto had_buffer = slots[slot].graphic_buffer != nullptr;
slots[slot].graphic_buffer.reset();
if (slots[slot].buffer_state == BufferState::Acquired) {
slots[slot].needs_cleanup_on_release = true;
}
if (slots[slot].buffer_state != BufferState::Free) {
free_slots.insert(slot);
} else if (had_buffer) {
// If the slot was FREE, but we had a buffer, we need to move this slot from the free
// buffers list to the the free slots list.
free_buffers.remove(slot);
free_slots.insert(slot);
}
slots[slot].buffer_state = BufferState::Free;
slots[slot].frame_number = UINT32_MAX;
slots[slot].acquire_called = false;
slots[slot].frame_number = 0;
slots[slot].fence = Fence::NoFence();
}
@@ -111,7 +126,8 @@ bool BufferQueueCore::StillTracking(const BufferItem& item) const {
void BufferQueueCore::WaitWhileAllocatingLocked() const {
while (is_allocating) {
is_allocating_condition.wait(mutex);
std::unique_lock lk(mutex);
is_allocating_condition.wait(lk);
}
}

View File

@@ -49,8 +49,24 @@ private:
bool StillTracking(const BufferItem& item) const;
void WaitWhileAllocatingLocked() const;
private:
class AutoLock final {
public:
AutoLock(std::shared_ptr<BufferQueueCore>& core_) : core{core_} {
core->lock.lock();
}
~AutoLock() {
core->lock.unlock();
}
private:
std::shared_ptr<BufferQueueCore>& core;
};
private:
mutable std::mutex mutex;
mutable std::unique_lock<std::mutex> lock;
bool is_abandoned{};
bool consumer_controlled_by_app{};
std::shared_ptr<IConsumerListener> consumer_listener;
@@ -59,8 +75,10 @@ private:
std::shared_ptr<IProducerListener> connected_producer_listener;
BufferQueueDefs::SlotsType slots{};
std::vector<BufferItem> queue;
std::set<s32> free_slots;
std::list<s32> free_buffers;
s32 override_max_buffer_count{};
mutable std::condition_variable_any dequeue_condition;
mutable std::condition_variable dequeue_condition;
const bool use_async_buffer{}; // This is always disabled on HOS
bool dequeue_buffer_cannot_block{};
PixelFormat default_buffer_format{PixelFormat::Rgba8888};
@@ -72,7 +90,7 @@ private:
u64 frame_counter{};
u32 transform_hint{};
bool is_allocating{};
mutable std::condition_variable_any is_allocating_condition;
mutable std::condition_variable is_allocating_condition;
bool allow_allocation{true};
u64 buffer_age{};
bool is_shutting_down{};

View File

@@ -38,7 +38,7 @@ BufferQueueProducer::~BufferQueueProducer() {
Status BufferQueueProducer::RequestBuffer(s32 slot, std::shared_ptr<GraphicBuffer>* buf) {
LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
@@ -65,7 +65,7 @@ Status BufferQueueProducer::SetBufferCount(s32 buffer_count) {
std::shared_ptr<IConsumerListener> listener;
{
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
core->WaitWhileAllocatingLocked();
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
@@ -156,14 +156,6 @@ Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
case BufferState::Acquired:
++acquired_count;
break;
case BufferState::Free:
// We return the oldest of the free buffers to avoid stalling the producer if
// possible, since the consumer may still have pending reads of in-flight buffers
if (*found == BufferQueueCore::INVALID_BUFFER_SLOT ||
slots[s].frame_number < slots[*found].frame_number) {
*found = s;
}
break;
default:
break;
}
@@ -191,12 +183,27 @@ Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
}
}
*found = BufferQueueCore::INVALID_BUFFER_SLOT;
// If we disconnect and reconnect quickly, we can be in a state where our slots are empty
// but we have many buffers in the queue. This can cause us to run out of memory if we
// outrun the consumer. Wait here if it looks like we have too many buffers queued up.
const bool too_many_buffers = core->queue.size() > static_cast<size_t>(max_buffer_count);
if (too_many_buffers) {
LOG_ERROR(Service_NVFlinger, "queue size is {}, waiting", core->queue.size());
} else {
if (!core->free_buffers.empty()) {
auto slot = core->free_buffers.begin();
*found = *slot;
core->free_buffers.erase(slot);
} else if (core->allow_allocation && !core->free_slots.empty()) {
auto slot = core->free_slots.begin();
// Only return free slots up to the max buffer count
if (*slot < max_buffer_count) {
*found = *slot;
core->free_slots.erase(slot);
}
}
}
// If no buffer is found, or if the queue has too many buffers outstanding, wait for a
@@ -233,7 +240,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
Status return_flags = Status::NoError;
bool attached_by_consumer = false;
{
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
core->WaitWhileAllocatingLocked();
if (format == PixelFormat::NoFormat) {
format = core->default_buffer_format;
@@ -310,13 +317,12 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
}
{
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
return Status::NoInit;
}
slots[*out_slot].frame_number = UINT32_MAX;
slots[*out_slot].graphic_buffer = graphic_buffer;
}
}
@@ -333,7 +339,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
Status BufferQueueProducer::DetachBuffer(s32 slot) {
LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
return Status::NoInit;
@@ -368,7 +374,7 @@ Status BufferQueueProducer::DetachNextBuffer(std::shared_ptr<GraphicBuffer>* out
return Status::BadValue;
}
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
core->WaitWhileAllocatingLocked();
@@ -376,22 +382,13 @@ Status BufferQueueProducer::DetachNextBuffer(std::shared_ptr<GraphicBuffer>* out
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
return Status::NoInit;
}
// Find the oldest valid slot
int found = BufferQueueCore::INVALID_BUFFER_SLOT;
for (int s = 0; s < BufferQueueDefs::NUM_BUFFER_SLOTS; ++s) {
if (slots[s].buffer_state == BufferState::Free && slots[s].graphic_buffer != nullptr) {
if (found == BufferQueueCore::INVALID_BUFFER_SLOT ||
slots[s].frame_number < slots[found].frame_number) {
found = s;
}
}
}
if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
if (core->free_buffers.empty()) {
return Status::NoMemory;
}
const s32 found = core->free_buffers.front();
core->free_buffers.remove(found);
LOG_DEBUG(Service_NVFlinger, "Detached slot {}", found);
*out_buffer = slots[found].graphic_buffer;
@@ -412,7 +409,7 @@ Status BufferQueueProducer::AttachBuffer(s32* out_slot,
return Status::BadValue;
}
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
core->WaitWhileAllocatingLocked();
Status return_flags = Status::NoError;
@@ -472,7 +469,7 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
BufferItem item;
{
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
@@ -557,9 +554,7 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
// mark it as freed
if (core->StillTracking(*front)) {
slots[front->slot].buffer_state = BufferState::Free;
// Reset the frame number of the freed buffer so that it is the first in line to
// be dequeued again
slots[front->slot].frame_number = 0;
core->free_buffers.push_front(front->slot);
}
// Overwrite the droppable buffer with the incoming one
*front = item;
@@ -587,9 +582,10 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
// Call back without the main BufferQueue lock held, but with the callback lock held so we can
// ensure that callbacks occur in order
{
std::scoped_lock lock(callback_mutex);
std::unique_lock lock(callback_mutex);
while (callback_ticket != current_callback_ticket) {
callback_condition.wait(callback_mutex);
std::unique_lock<std::mutex> lk(callback_mutex);
callback_condition.wait(lk);
}
if (frameAvailableListener != nullptr) {
@@ -608,7 +604,7 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
void BufferQueueProducer::CancelBuffer(s32 slot, const Fence& fence) {
LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
@@ -625,8 +621,8 @@ void BufferQueueProducer::CancelBuffer(s32 slot, const Fence& fence) {
return;
}
core->free_buffers.push_front(slot);
slots[slot].buffer_state = BufferState::Free;
slots[slot].frame_number = 0;
slots[slot].fence = fence;
core->SignalDequeueCondition();
@@ -634,7 +630,7 @@ void BufferQueueProducer::CancelBuffer(s32 slot, const Fence& fence) {
}
Status BufferQueueProducer::Query(NativeWindow what, s32* out_value) {
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
if (out_value == nullptr) {
LOG_ERROR(Service_NVFlinger, "outValue was nullptr");
@@ -691,7 +687,7 @@ Status BufferQueueProducer::Query(NativeWindow what, s32* out_value) {
Status BufferQueueProducer::Connect(const std::shared_ptr<IProducerListener>& listener,
NativeWindowApi api, bool producer_controlled_by_app,
QueueBufferOutput* output) {
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
LOG_DEBUG(Service_NVFlinger, "api = {} producer_controlled_by_app = {}", api,
producer_controlled_by_app);
@@ -749,7 +745,7 @@ Status BufferQueueProducer::Disconnect(NativeWindowApi api) {
std::shared_ptr<IConsumerListener> listener;
{
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
core->WaitWhileAllocatingLocked();
@@ -799,11 +795,10 @@ Status BufferQueueProducer::SetPreallocatedBuffer(s32 slot,
return Status::BadValue;
}
std::scoped_lock lock(core->mutex);
BufferQueueCore::AutoLock lock(core);
slots[slot] = {};
slots[slot].graphic_buffer = buffer;
slots[slot].frame_number = 0;
// Most games preallocate a buffer and pass a valid buffer here. However, it is possible for
// this to be called with an empty buffer, Naruto Ultimate Ninja Storm is a game that does this.

View File

@@ -77,7 +77,7 @@ private:
std::mutex callback_mutex;
s32 next_callback_ticket{};
s32 current_callback_ticket{};
std::condition_variable_any callback_condition;
std::condition_variable callback_condition;
};
} // namespace Service::android

View File

@@ -18,7 +18,7 @@ ConsumerBase::ConsumerBase(std::unique_ptr<BufferQueueConsumer> consumer_)
: consumer{std::move(consumer_)} {}
ConsumerBase::~ConsumerBase() {
std::scoped_lock lock(mutex);
std::unique_lock lock(mutex);
ASSERT_MSG(is_abandoned, "consumer is not abandoned!");
}
@@ -36,17 +36,17 @@ void ConsumerBase::FreeBufferLocked(s32 slot_index) {
}
void ConsumerBase::OnFrameAvailable(const BufferItem& item) {
std::scoped_lock lock(mutex);
std::unique_lock lock(mutex);
LOG_DEBUG(Service_NVFlinger, "called");
}
void ConsumerBase::OnFrameReplaced(const BufferItem& item) {
std::scoped_lock lock(mutex);
std::unique_lock lock(mutex);
LOG_DEBUG(Service_NVFlinger, "called");
}
void ConsumerBase::OnBuffersReleased() {
std::scoped_lock lock(mutex);
std::unique_lock lock(mutex);
LOG_DEBUG(Service_NVFlinger, "called");
}

View File

@@ -322,7 +322,7 @@ struct Memory::Impl {
}
if (Settings::IsFastmemEnabled()) {
const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
}

View File

@@ -352,7 +352,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
shader_cache.OnCPUWrite(addr, size);
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
texture_cache.CachedWriteMemory(addr, size);
}
{
std::scoped_lock lock{buffer_cache.mutex};
@@ -363,6 +363,10 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
shader_cache.SyncGuestHost();
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.FlushCachedWrites();
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.FlushCachedWrites();

View File

@@ -408,7 +408,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
pipeline_cache.OnCPUWrite(addr, size);
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
texture_cache.CachedWriteMemory(addr, size);
}
{
std::scoped_lock lock{buffer_cache.mutex};
@@ -418,6 +418,10 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
void RasterizerVulkan::SyncGuestHost() {
pipeline_cache.SyncGuestHost();
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.FlushCachedWrites();
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.FlushCachedWrites();

View File

@@ -39,6 +39,9 @@ enum class ImageFlagBits : u32 {
Rescaled = 1 << 13,
CheckingRescalable = 1 << 14,
IsRescalable = 1 << 15,
// Cached CPU
CachedCpuModified = 1 << 16, ///< Contents have been modified from the CPU
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)

View File

@@ -437,6 +437,23 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
});
}
template <class P>
void TextureCache<P>::CachedWriteMemory(VAddr cpu_addr, size_t size) {
const VAddr new_cpu_addr = Common::AlignDown(cpu_addr, CPU_PAGE_SIZE);
const size_t new_size = Common::AlignUp(size + cpu_addr - new_cpu_addr, CPU_PAGE_SIZE);
ForEachImageInRegion(new_cpu_addr, new_size, [this](ImageId image_id, Image& image) {
if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
return;
}
image.flags |= ImageFlagBits::CachedCpuModified;
cached_cpu_invalidate.insert(image_id);
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
});
}
template <class P>
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
std::vector<ImageId> images;
@@ -494,6 +511,18 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
}
}
template <class P>
void TextureCache<P>::FlushCachedWrites() {
for (ImageId image_id : cached_cpu_invalidate) {
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
image.flags &= ~ImageFlagBits::CachedCpuModified;
image.flags |= ImageFlagBits::CpuModified;
}
}
cached_cpu_invalidate.clear();
}
template <class P>
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
@@ -1560,6 +1589,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
template <class P>
void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
ASSERT(False(image.flags & ImageFlagBits::Tracked));
if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
return;
}
image.flags |= ImageFlagBits::Tracked;
if (False(image.flags & ImageFlagBits::Sparse)) {
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
@@ -1616,6 +1648,9 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory -= Common::AlignUp(tentative_size, 1024);
if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
cached_cpu_invalidate.erase(image_id);
}
const GPUVAddr gpu_addr = image.gpu_addr;
const auto alloc_it = image_allocs_table.find(gpu_addr);
if (alloc_it == image_allocs_table.end()) {
@@ -1782,7 +1817,11 @@ template <class P>
void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
Image& image = slot_images[image_id];
if (invalidate) {
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
cached_cpu_invalidate.erase(image_id);
}
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified |
ImageFlagBits::CachedCpuModified);
if (False(image.flags & ImageFlagBits::Tracked)) {
TrackImage(image, image_id);
}

View File

@@ -8,6 +8,7 @@
#include <span>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <queue>
@@ -50,6 +51,9 @@ class TextureCache {
/// Address shift for caching images into a hash table
static constexpr u64 PAGE_BITS = 20;
static constexpr u64 CPU_PAGE_BITS = 12;
static constexpr u64 CPU_PAGE_SIZE = 1ULL << CPU_PAGE_BITS;
/// Enables debugging features to the texture cache
static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
/// Implement blits as copies between framebuffers
@@ -136,6 +140,9 @@ public:
/// Mark images in a range as modified from the CPU
void WriteMemory(VAddr cpu_addr, size_t size);
/// Mark images in a range as modified from the CPU
void CachedWriteMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size);
@@ -145,6 +152,8 @@ public:
/// Remove images in a region
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
void FlushCachedWrites();
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
@@ -366,6 +375,8 @@ private:
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
std::unordered_set<ImageId> cached_cpu_invalidate;
VAddr virtual_invalid_space{};
bool has_deleted_images = false;