Compare commits

..

4 Commits

Author SHA1 Message Date
Fernando Sahmkow
a8d4927e29 Corrections, documenting and fixes. 2019-02-16 16:52:24 -04:00
Fernando Sahmkow
ecccfe0337 Use u128 on Clock Cycles calculation. 2019-02-15 22:57:16 -04:00
Fernando Sahmkow
3ea48e8ebe Implement 128 bits Unsigned Integer Multiplication and Division. 2019-02-15 22:55:31 -04:00
Fernando Sahmkow
5b7ec71fb7 Correct CNTPCT to use Clock Cycles instead of Cpu Cycles. 2019-02-15 22:55:29 -04:00
105 changed files with 660 additions and 2451 deletions

View File

@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr
It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.
yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success.
yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics.
yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.

View File

@@ -46,18 +46,16 @@ struct AudioRendererParameter {
u32_le sample_rate;
u32_le sample_count;
u32_le mix_buffer_count;
u32_le submix_count;
u32_le unknown_c;
u32_le voice_count;
u32_le sink_count;
u32_le effect_count;
u32_le performance_frame_count;
u8 is_voice_drop_enabled;
u8 unknown_21;
u8 unknown_22;
u8 execution_mode;
u32_le unknown_1c;
u8 unknown_20;
INSERT_PADDING_BYTES(3);
u32_le splitter_count;
u32_le num_splitter_send_channels;
u32_le unknown_30;
u32_le unknown_2c;
INSERT_PADDING_WORDS(1);
u32_le revision;
};
static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");

View File

@@ -21,7 +21,7 @@ public:
Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {}
/// Returns the raw audio data for the buffer
std::vector<s16>& GetSamples() {
std::vector<s16>& Samples() {
return samples;
}

View File

@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
}
}
state.yn1 = static_cast<s16>(yn1);
state.yn2 = static_cast<s16>(yn2);
state.yn1 = yn1;
state.yn2 = yn2;
return ret;
}

View File

@@ -12,10 +12,6 @@
#include "common/ring_buffer.h"
#include "core/settings.h"
#ifdef _MSC_VER
#include <objbase.h>
#endif
namespace AudioCore {
class CubebSinkStream final : public SinkStream {
@@ -50,7 +46,7 @@ public:
}
}
~CubebSinkStream() override {
~CubebSinkStream() {
if (!ctx) {
return;
}
@@ -79,11 +75,11 @@ public:
queue.Push(samples);
}
std::size_t SamplesInQueue(u32 channel_count) const override {
std::size_t SamplesInQueue(u32 num_channels) const override {
if (!ctx)
return 0;
return queue.Size() / channel_count;
return queue.Size() / num_channels;
}
void Flush() override {
@@ -102,7 +98,7 @@ private:
u32 num_channels{};
Common::RingBuffer<s16, 0x10000> queue;
std::array<s16, 2> last_frame{};
std::array<s16, 2> last_frame;
std::atomic<bool> should_flush{};
TimeStretcher time_stretch;
@@ -112,11 +108,6 @@ private:
};
CubebSink::CubebSink(std::string_view target_device_name) {
// Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
#ifdef _MSC_VER
com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
#endif
if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
return;
@@ -151,12 +142,6 @@ CubebSink::~CubebSink() {
}
cubeb_destroy(ctx);
#ifdef _MSC_VER
if (SUCCEEDED(com_init_result)) {
CoUninitialize();
}
#endif
}
SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,

View File

@@ -25,10 +25,6 @@ private:
cubeb* ctx{};
cubeb_devid output_device{};
std::vector<SinkStreamPtr> sink_streams;
#ifdef _MSC_VER
u32 com_init_result = 0;
#endif
};
std::vector<std::string> ListCubebSinkDevices();

View File

@@ -95,7 +95,7 @@ void Stream::PlayNextBuffer() {
active_buffer = queued_buffers.front();
queued_buffers.pop();
VolumeAdjustSamples(active_buffer->GetSamples());
VolumeAdjustSamples(active_buffer->Samples());
sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());

View File

@@ -113,6 +113,8 @@ add_library(common STATIC
threadsafe_queue.h
timer.cpp
timer.h
uint128.cpp
uint128.h
vector_math.h
web_result.h
)

View File

@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
/**
* Decode a color stored in RGBA8 format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Common::Vec4<u8>
* @return Result color decoded as Math::Vec4<u8>
*/
inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
return {bytes[3], bytes[2], bytes[1], bytes[0]};
}
/**
* Decode a color stored in RGB8 format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Common::Vec4<u8>
* @return Result color decoded as Math::Vec4<u8>
*/
inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
return {bytes[2], bytes[1], bytes[0], 255};
}
/**
* Decode a color stored in RG8 (aka HILO8) format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Common::Vec4<u8>
* @return Result color decoded as Math::Vec4<u8>
*/
inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
inline Math::Vec4<u8> DecodeRG8(const u8* bytes) {
return {bytes[1], bytes[0], 0, 255};
}
/**
* Decode a color stored in RGB565 format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Common::Vec4<u8>
* @return Result color decoded as Math::Vec4<u8>
*/
inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
u16_le pixel;
std::memcpy(&pixel, bytes, sizeof(pixel));
return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
/**
* Decode a color stored in RGB5A1 format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Common::Vec4<u8>
* @return Result color decoded as Math::Vec4<u8>
*/
inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
u16_le pixel;
std::memcpy(&pixel, bytes, sizeof(pixel));
return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
/**
* Decode a color stored in RGBA4 format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Common::Vec4<u8>
* @return Result color decoded as Math::Vec4<u8>
*/
inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
u16_le pixel;
std::memcpy(&pixel, bytes, sizeof(pixel));
return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
/**
* Decode a depth value and a stencil value stored in D24S8 format
* @param bytes Pointer to encoded source values
* @return Resulting values stored as a Common::Vec2
* @return Resulting values stored as a Math::Vec2
*/
inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
}
@@ -149,7 +149,7 @@ inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
bytes[3] = color.r();
bytes[2] = color.g();
bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
bytes[2] = color.r();
bytes[1] = color.g();
bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
bytes[1] = color.r();
bytes[0] = color.g();
}
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
const u16_le data =
(Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
(Convert8To5(color.b()) << 1) | Convert8To1(color.a());
@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
(Convert8To4(color.b()) << 4) | Convert8To4(color.a());

View File

@@ -39,10 +39,10 @@ public:
Impl(Impl const&) = delete;
const Impl& operator=(Impl const&) = delete;
void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
const char* function, std::string message) {
message_queue.Push(
CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
void PushEntry(Entry e) {
std::lock_guard<std::mutex> lock(message_mutex);
message_queue.Push(std::move(e));
message_cv.notify_one();
}
void AddBackend(std::unique_ptr<Backend> backend) {
@@ -86,13 +86,15 @@ private:
}
};
while (true) {
entry = message_queue.PopWait();
if (entry.final_entry) {
{
std::unique_lock<std::mutex> lock(message_mutex);
message_cv.wait(lock, [&] { return !running || message_queue.Pop(entry); });
}
if (!running) {
break;
}
write_logs(entry);
}
// Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case
// where a system is repeatedly spamming logs even on close.
const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
@@ -104,36 +106,18 @@ private:
}
~Impl() {
Entry entry;
entry.final_entry = true;
message_queue.Push(entry);
running = false;
message_cv.notify_one();
backend_thread.join();
}
Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
const char* function, std::string message) const {
using std::chrono::duration_cast;
using std::chrono::steady_clock;
Entry entry;
entry.timestamp =
duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
entry.log_class = log_class;
entry.log_level = log_level;
entry.filename = Common::TrimSourcePath(filename);
entry.line_num = line_nr;
entry.function = function;
entry.message = std::move(message);
return entry;
}
std::mutex writing_mutex;
std::atomic_bool running{true};
std::mutex message_mutex, writing_mutex;
std::condition_variable message_cv;
std::thread backend_thread;
std::vector<std::unique_ptr<Backend>> backends;
Common::MPSCQueue<Log::Entry> message_queue;
Filter filter;
std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
};
void ConsoleBackend::Write(const Entry& entry) {
@@ -292,6 +276,25 @@ const char* GetLevelName(Level log_level) {
#undef LVL
}
Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
const char* function, std::string message) {
using std::chrono::duration_cast;
using std::chrono::steady_clock;
static steady_clock::time_point time_origin = steady_clock::now();
Entry entry;
entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
entry.log_class = log_class;
entry.log_level = log_level;
entry.filename = Common::TrimSourcePath(filename);
entry.line_num = line_nr;
entry.function = function;
entry.message = std::move(message);
return entry;
}
void SetGlobalFilter(const Filter& filter) {
Impl::Instance().SetGlobalFilter(filter);
}
@@ -316,7 +319,9 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
if (!filter.CheckMessage(log_class, log_level))
return;
instance.PushEntry(log_class, log_level, filename, line_num, function,
fmt::vformat(format, args));
Entry entry =
CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args));
instance.PushEntry(std::move(entry));
}
} // namespace Log

View File

@@ -27,7 +27,6 @@ struct Entry {
unsigned int line_num;
std::string function;
std::string message;
bool final_entry = false;
Entry() = default;
Entry(Entry&& o) = default;
@@ -135,6 +134,10 @@ const char* GetLogClassName(Class log_class);
*/
const char* GetLevelName(Level log_level);
/// Creates a log entry by formatting the given source location, and message.
Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
const char* function, std::string message);
/**
* The global filter will prevent any messages from even being processed if they are filtered. Each
* backend can have a filter, but if the level is lower than the global filter, the backend will

View File

@@ -7,7 +7,7 @@
#include <cstdlib>
#include <type_traits>
namespace Common {
namespace MathUtil {
constexpr float PI = 3.14159265f;
@@ -41,4 +41,4 @@ struct Rectangle {
}
};
} // namespace Common
} // namespace MathUtil

View File

@@ -6,12 +6,12 @@
#include "common/vector_math.h"
namespace Common {
namespace Math {
template <typename T>
class Quaternion {
public:
Vec3<T> xyz;
Math::Vec3<T> xyz;
T w{};
Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
};
template <typename T>
auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) {
return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
}
inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) {
return {axis * std::sin(angle / 2), std::cos(angle / 2)};
}
} // namespace Common
} // namespace Math

View File

@@ -28,8 +28,8 @@
#include <cstring>
#include "common/common_types.h"
// GCC
#ifdef __GNUC__
// GCC 4.6+
#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
#define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
#endif
// LLVM/clang
#elif defined(__clang__)
#elif __clang__
#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
#define COMMON_LITTLE_ENDIAN 1

View File

@@ -8,7 +8,6 @@
// single reader, single writer queue
#include <atomic>
#include <condition_variable>
#include <cstddef>
#include <mutex>
#include <utility>
@@ -46,7 +45,6 @@ public:
ElementPtr* new_ptr = new ElementPtr();
write_ptr->next.store(new_ptr, std::memory_order_release);
write_ptr = new_ptr;
cv.notify_one();
++size;
}
@@ -76,16 +74,6 @@ public:
return true;
}
T PopWait() {
if (Empty()) {
std::unique_lock<std::mutex> lock(cv_mutex);
cv.wait(lock, [this]() { return !Empty(); });
}
T t;
Pop(t);
return t;
}
// not thread-safe
void Clear() {
size.store(0);
@@ -113,8 +101,6 @@ private:
ElementPtr* write_ptr;
ElementPtr* read_ptr;
std::atomic_size_t size{0};
std::mutex cv_mutex;
std::condition_variable cv;
};
// a simple thread-safe,
@@ -149,10 +135,6 @@ public:
return spsc_queue.Pop(t);
}
T PopWait() {
return spsc_queue.PopWait();
}
// not thread-safe
void Clear() {
spsc_queue.Clear();

41
src/common/uint128.cpp Normal file
View File

@@ -0,0 +1,41 @@
#ifdef _MSC_VER
#include <intrin.h>
#pragma intrinsic(_umul128)
#endif
#include <cstring>
#include "common/uint128.h"
namespace Common {
u128 Multiply64Into128(u64 a, u64 b) {
u128 result;
#ifdef _MSC_VER
result[0] = _umul128(a, b, &result[1]);
#else
unsigned __int128 tmp = a;
tmp *= b;
std::memcpy(&result, &tmp, sizeof(u128));
#endif
return result;
}
std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
u64 remainder = dividend[0] % divisor;
u64 accum = dividend[0] / divisor;
if (dividend[1] == 0)
return {accum, remainder};
// We ignore dividend[1] / divisor as that overflows
const u64 first_segment = (dividend[1] % divisor) << 32;
accum += (first_segment / divisor) << 32;
const u64 second_segment = (first_segment % divisor) << 32;
accum += (second_segment / divisor);
remainder += second_segment % divisor;
if (remainder >= divisor) {
accum++;
remainder -= divisor;
}
return {accum, remainder};
}
} // namespace Common

14
src/common/uint128.h Normal file
View File

@@ -0,0 +1,14 @@
#include <utility>
#include "common/common_types.h"
namespace Common {
// This function multiplies 2 u64 values and produces a u128 value;
u128 Multiply64Into128(u64 a, u64 b);
// This function divides a u128 by a u32 value and produces two u64 values:
// the result of division and the remainder
std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
} // namespace Common

View File

@@ -33,7 +33,7 @@
#include <cmath>
#include <type_traits>
namespace Common {
namespace Math {
template <typename T>
class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
return MakeVec(x, yzw[0], yzw[1], yzw[2]);
}
} // namespace Common
} // namespace Math

View File

@@ -400,10 +400,6 @@ add_library(core STATIC
hle/service/time/time.h
hle/service/usb/usb.cpp
hle/service/usb/usb.h
hle/service/vi/display/vi_display.cpp
hle/service/vi/display/vi_display.h
hle/service/vi/layer/vi_layer.cpp
hle/service/vi/layer/vi_layer.h
hle/service/vi/vi.cpp
hle/service/vi/vi.h
hle/service/vi/vi_m.cpp

View File

@@ -12,6 +12,7 @@
#include "core/core.h"
#include "core/core_cpu.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/svc.h"
@@ -119,7 +120,7 @@ public:
return std::max(parent.core_timing.GetDowncount(), 0);
}
u64 GetCNTPCT() override {
return parent.core_timing.GetTicks();
return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
}
ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
config.tpidr_el0 = &cb->tpidr_el0;
config.dczid_el0 = 4;
config.ctr_el0 = 0x8444c004;
config.cntfrq_el0 = 19200000; // Value from fusee.
config.cntfrq_el0 = Timing::CNTFREQ;
// Unpredictable instructions
config.define_unpredictable_behaviour = true;

View File

@@ -128,7 +128,7 @@ struct System::Impl {
return ResultStatus::ErrorVideoCore;
}
gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer());
gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer());
cpu_core_manager.Initialize(system);
is_powered_on = true;

View File

@@ -11,7 +11,6 @@
#endif
#include "core/arm/exclusive_monitor.h"
#include "core/arm/unicorn/arm_unicorn.h"
#include "core/core.h"
#include "core/core_cpu.h"
#include "core/core_timing.h"
#include "core/hle/kernel/scheduler.h"
@@ -50,9 +49,9 @@ bool CpuBarrier::Rendezvous() {
return false;
}
Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
std::size_t core_index)
: cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
CpuBarrier& cpu_barrier, std::size_t core_index)
: cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} {
if (Settings::values.use_cpu_jit) {
#ifdef ARCHITECTURE_x86_64
arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
@@ -64,7 +63,7 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba
arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
}
scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface);
}
Cpu::~Cpu() = default;

View File

@@ -15,10 +15,6 @@ namespace Kernel {
class Scheduler;
}
namespace Core {
class System;
}
namespace Core::Timing {
class CoreTiming;
}
@@ -49,8 +45,8 @@ private:
class Cpu {
public:
Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
std::size_t core_index);
Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
CpuBarrier& cpu_barrier, std::size_t core_index);
~Cpu();
void RunLoop(bool tight_loop = true);

View File

@@ -7,6 +7,7 @@
#include <cinttypes>
#include <limits>
#include "common/logging/log.h"
#include "common/uint128.h"
namespace Core::Timing {
@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
}
u64 CpuCyclesToClockCycles(u64 ticks) {
const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
}
} // namespace Core::Timing

View File

@@ -11,6 +11,7 @@ namespace Core::Timing {
// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
// The exact value used is of course unverified.
constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
constexpr u64 CNTFREQ = 19200000; // Value from fusee.
inline s64 msToCycles(int ms) {
// since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
return cycles * 1000 / BASE_CLOCK_RATE;
}
u64 CpuCyclesToClockCycles(u64 ticks);
} // namespace Core::Timing

View File

@@ -27,7 +27,8 @@ void CpuCoreManager::Initialize(System& system) {
exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
for (std::size_t index = 0; index < cores.size(); ++index) {
cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
cores[index] =
std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index);
}
// Create threads for CPU cores 1-3, and build thread_to_cpu map

View File

@@ -398,8 +398,7 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
}
void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
std::ifstream file;
OpenFStream(file, filename, std::ios_base::in);
std::ifstream file(filename);
if (!file.is_open())
return;

View File

@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
if (offset + length > data.size())
data.resize(offset + length);
const auto write = std::min(length, data.size() - offset);
std::memcpy(data.data() + offset, data_, write);
std::memcpy(data.data(), data_, write);
return write;
}

View File

@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
}
std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) {
new_x = std::max(new_x, framebuffer_layout.screen.left);
new_x = std::min(new_x, framebuffer_layout.screen.right - 1);

View File

@@ -166,7 +166,7 @@ private:
/**
* Clip the provided coordinates to be inside the touchscreen area.
*/
std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y);
};
} // namespace Core::Frontend

View File

@@ -12,12 +12,12 @@ namespace Layout {
// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
template <class T>
static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
float screen_aspect_ratio) {
static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area,
float screen_aspect_ratio) {
float scale = std::min(static_cast<float>(window_area.GetWidth()),
window_area.GetHeight() / screen_aspect_ratio);
return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
static_cast<T>(std::round(scale * screen_aspect_ratio))};
return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
static_cast<T>(std::round(scale * screen_aspect_ratio))};
}
FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
ScreenUndocked::Width};
Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height};
MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio);
float window_aspect_ratio = static_cast<float>(height) / width;

View File

@@ -16,7 +16,7 @@ struct FramebufferLayout {
unsigned width{ScreenUndocked::Width};
unsigned height{ScreenUndocked::Height};
Common::Rectangle<unsigned> screen;
MathUtil::Rectangle<unsigned> screen;
/**
* Returns the ration of pixel size of the screen, compared to the native size of the undocked

View File

@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
* Orientation is determined by right-hand rule.
* Units: deg/sec
*/
using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>;
/**
* A touch device is an input device that returns a tuple of two floats and a bool. The floats are

View File

@@ -17,7 +17,8 @@
#include "core/hle/result.h"
#include "core/memory.h"
namespace Kernel::AddressArbiter {
namespace Kernel {
namespace AddressArbiter {
// Performs actual address waiting logic.
static ResultCode WaitForAddress(VAddr address, s64 timeout) {
@@ -175,4 +176,5 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
return WaitForAddress(address, timeout);
}
} // namespace Kernel::AddressArbiter
} // namespace AddressArbiter
} // namespace Kernel

View File

@@ -8,8 +8,9 @@
union ResultCode;
namespace Kernel::AddressArbiter {
namespace Kernel {
namespace AddressArbiter {
enum class ArbitrationType {
WaitIfLessThan = 0,
DecrementAndWaitIfLessThan = 1,
@@ -28,5 +29,6 @@ ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 valu
ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement);
ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
} // namespace AddressArbiter
} // namespace Kernel::AddressArbiter
} // namespace Kernel

View File

@@ -14,7 +14,6 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};

View File

@@ -14,47 +14,32 @@
namespace Kernel {
namespace {
constexpr u16 GetSlot(Handle handle) {
return static_cast<u16>(handle >> 15);
return handle >> 15;
}
constexpr u16 GetGeneration(Handle handle) {
return static_cast<u16>(handle & 0x7FFF);
return handle & 0x7FFF;
}
} // Anonymous namespace
HandleTable::HandleTable() {
next_generation = 1;
Clear();
}
HandleTable::~HandleTable() = default;
ResultCode HandleTable::SetSize(s32 handle_table_size) {
if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
return ERR_OUT_OF_MEMORY;
}
// Values less than or equal to zero indicate to use the maximum allowable
// size for the handle table in the actual kernel, so we ignore the given
// value in that case, since we assume this by default unless this function
// is called.
if (handle_table_size > 0) {
table_size = static_cast<u16>(handle_table_size);
}
return RESULT_SUCCESS;
}
ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
DEBUG_ASSERT(obj != nullptr);
const u16 slot = next_free_slot;
if (slot >= table_size) {
u16 slot = next_free_slot;
if (slot >= generations.size()) {
LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
return ERR_HANDLE_TABLE_FULL;
}
next_free_slot = generations[slot];
const u16 generation = next_generation++;
u16 generation = next_generation++;
// Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
// Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -79,11 +64,10 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
}
ResultCode HandleTable::Close(Handle handle) {
if (!IsValid(handle)) {
if (!IsValid(handle))
return ERR_INVALID_HANDLE;
}
const u16 slot = GetSlot(handle);
u16 slot = GetSlot(handle);
objects[slot] = nullptr;
@@ -93,10 +77,10 @@ ResultCode HandleTable::Close(Handle handle) {
}
bool HandleTable::IsValid(Handle handle) const {
const std::size_t slot = GetSlot(handle);
const u16 generation = GetGeneration(handle);
std::size_t slot = GetSlot(handle);
u16 generation = GetGeneration(handle);
return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation;
}
SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -113,7 +97,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
}
void HandleTable::Clear() {
for (u16 i = 0; i < table_size; ++i) {
for (u16 i = 0; i < MAX_COUNT; ++i) {
generations[i] = i + 1;
objects[i] = nullptr;
}

View File

@@ -49,20 +49,6 @@ public:
HandleTable();
~HandleTable();
/**
* Sets the number of handles that may be in use at one time
* for this handle table.
*
* @param handle_table_size The desired size to limit the handle table to.
*
* @returns an error code indicating if initialization was successful.
* If initialization was not successful, then ERR_OUT_OF_MEMORY
* will be returned.
*
* @pre handle_table_size must be within the range [0, 1024]
*/
ResultCode SetSize(s32 handle_table_size);
/**
* Allocates a handle for the given object.
* @return The created Handle or one of the following errors:
@@ -117,21 +103,14 @@ private:
*/
std::array<u16, MAX_COUNT> generations;
/**
* The limited size of the handle table. This can be specified by process
* capabilities in order to restrict the overall number of handles that
* can be created in a process instance
*/
u16 table_size = static_cast<u16>(MAX_COUNT);
/**
* Global counter of the number of created handles. Stored in `generations` when a handle is
* created, and wraps around to 1 when it hits 0x8000.
*/
u16 next_generation = 1;
u16 next_generation;
/// Head of the free slots linked list.
u16 next_free_slot = 0;
u16 next_free_slot;
};
} // namespace Kernel

View File

@@ -99,13 +99,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
vm_manager.Reset(metadata.GetAddressSpaceType());
const auto& caps = metadata.GetKernelCapabilities();
const auto capability_init_result =
capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
if (capability_init_result.IsError()) {
return capability_init_result;
}
return handle_table.SetSize(capabilities.GetHandleTableSize());
return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
}
void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {

View File

@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
interrupt_capabilities.set();
// Allow using the maximum possible amount of handles
handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT);
// Allow all debugging capabilities.
is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
return ERR_RESERVED_VALUE;
}
handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
handle_table_size = (flags >> 16) & 0x3FF;
return RESULT_SUCCESS;
}

View File

@@ -156,7 +156,7 @@ public:
}
/// Gets the number of total allowable handles for the process' handle table.
s32 GetHandleTableSize() const {
u32 GetHandleTableSize() const {
return handle_table_size;
}
@@ -252,7 +252,7 @@ private:
u64 core_mask = 0;
u64 priority_mask = 0;
s32 handle_table_size = 0;
u32 handle_table_size = 0;
u32 kernel_version = 0;
ProgramType program_type = ProgramType::SysModule;

View File

@@ -19,8 +19,7 @@ namespace Kernel {
std::mutex Scheduler::scheduler_mutex;
Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
: cpu_core{cpu_core}, system{system} {}
Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {}
Scheduler::~Scheduler() {
for (auto& thread : thread_list) {
@@ -62,7 +61,7 @@ Thread* Scheduler::PopNextReadyThread() {
void Scheduler::SwitchContext(Thread* new_thread) {
Thread* const previous_thread = GetCurrentThread();
Process* const previous_process = system.Kernel().CurrentProcess();
Process* const previous_process = Core::CurrentProcess();
UpdateLastContextSwitchTime(previous_thread, previous_process);
@@ -95,8 +94,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {
auto* const thread_owner_process = current_thread->GetOwnerProcess();
if (previous_process != thread_owner_process) {
system.Kernel().MakeCurrentProcess(thread_owner_process);
SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process);
SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table);
}
cpu_core.LoadContext(new_thread->GetContext());
@@ -112,7 +111,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
const u64 prev_switch_ticks = last_context_switch_time;
const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
if (thread != nullptr) {
@@ -224,7 +223,8 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
// Take the first non-nullptr one
for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
const auto res =
system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread(
core, priority);
// If scheduler provides a suggested thread
if (res != nullptr) {

View File

@@ -13,8 +13,7 @@
namespace Core {
class ARM_Interface;
class System;
} // namespace Core
}
namespace Kernel {
@@ -22,7 +21,7 @@ class Process;
class Scheduler final {
public:
explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
explicit Scheduler(Core::ARM_Interface& cpu_core);
~Scheduler();
/// Returns whether there are any threads that are ready to run.
@@ -163,7 +162,6 @@ private:
Core::ARM_Interface& cpu_core;
u64 last_context_switch_time = 0;
Core::System& system;
static std::mutex scheduler_mutex;
};

View File

@@ -37,7 +37,7 @@ public:
{8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"},
{9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"},
{10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"},
{11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"},
{11, nullptr, "ExecuteAudioRendererRendering"},
};
// clang-format on
RegisterHandlers(functions);
@@ -138,17 +138,6 @@ private:
rb.Push(rendering_time_limit_percent);
}
void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
// This service command currently only reports an unsupported operation
// error code, or aborts. Given that, we just always return an error
// code in this case.
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultCode{ErrorModule::Audio, 201});
}
Kernel::EventPair system_event;
std::unique_ptr<AudioCore::AudioRenderer> renderer;
u32 rendering_time_limit_percent = 100;
@@ -246,7 +235,7 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
{0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
{1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
{2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
{3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"},
{3, nullptr, "OpenAudioRendererAuto"},
{4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
};
// clang-format on
@@ -259,7 +248,12 @@ AudRenU::~AudRenU() = default;
void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
OpenAudioRendererImpl(ctx);
IPC::RequestParser rp{ctx};
auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
}
void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
@@ -268,20 +262,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
buffer_sz += params.submix_count * 1024;
buffer_sz += 0x940 * (params.submix_count + 1);
buffer_sz += params.unknown_c * 1024;
buffer_sz += 0x940 * (params.unknown_c + 1);
buffer_sz += 0x3F0 * params.voice_count;
buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10);
buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
buffer_sz += Common::AlignUp(
(0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
(params.mix_buffer_count + 6),
0x40);
buffer_sz +=
Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
(params.mix_buffer_count + 6),
0x40);
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
const u32 count = params.submix_count + 1;
u32 count = params.unknown_c + 1;
u64 node_count = Common::AlignUp(count, 0x40);
const u64 node_state_buffer_sz =
u64 node_state_buffer_sz =
4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
u64 edge_matrix_buffer_sz = 0;
node_count = Common::AlignUp(count * count, 0x40);
@@ -295,19 +289,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
buffer_sz += 0xE0 * params.num_splitter_send_channels;
buffer_sz += 0xE0 * params.unknown_2c;
buffer_sz += 0x20 * params.splitter_count;
buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10);
}
buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
((params.voice_count * 256) | 0x40);
if (params.performance_frame_count >= 1) {
if (params.unknown_1c >= 1) {
output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
16 * params.voice_count + 16) +
0x658) *
(params.performance_frame_count + 1) +
(params.unknown_1c + 1) +
0xc0,
0x40) +
output_sz;
@@ -331,12 +325,6 @@ void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
rb.PushIpcInterface<Audio::IAudioDevice>();
}
void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
OpenAudioRendererImpl(ctx);
}
void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
@@ -347,15 +335,6 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c
// based on the current revision
}
void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
rb.PushIpcInterface<IAudioRenderer>(params);
}
bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
switch (feature) {

View File

@@ -21,11 +21,8 @@ private:
void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx);
void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
enum class AudioFeatures : u32 {
Splitter,
};

View File

@@ -15,7 +15,7 @@ namespace Kernel {
class SharedMemory;
}
namespace Service::SM {
namespace SM {
class ServiceManager;
}

View File

@@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect) {
const MathUtil::Rectangle<int>& crop_rect) {
VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
LOG_TRACE(Service,
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",

View File

@@ -25,7 +25,7 @@ public:
/// Performs a screen flip, drawing the buffer pointed to by the handle.
void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
NVFlinger::BufferQueue::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect);
const MathUtil::Rectangle<int>& crop_rect);
private:
std::shared_ptr<nvmap> nvmap_dev;

View File

@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
}
void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect) {
const MathUtil::Rectangle<int>& crop_rect) {
auto itr = std::find_if(queue.begin(), queue.end(),
[&](const Buffer& buffer) { return buffer.slot == slot; });
ASSERT(itr != queue.end());

View File

@@ -67,14 +67,14 @@ public:
Status status = Status::Free;
IGBPBuffer igbp_buffer;
BufferTransformFlags transform;
Common::Rectangle<int> crop_rect;
MathUtil::Rectangle<int> crop_rect;
};
void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
std::optional<u32> DequeueBuffer(u32 width, u32 height);
const IGBPBuffer& RequestBuffer(u32 slot) const;
void QueueBuffer(u32 slot, BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect);
const MathUtil::Rectangle<int>& crop_rect);
std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
void ReleaseBuffer(u32 slot);
u32 Query(QueryType type);

View File

@@ -14,12 +14,11 @@
#include "core/core_timing_util.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/readable_event.h"
#include "core/hle/kernel/writable_event.h"
#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
#include "core/hle/service/nvflinger/nvflinger.h"
#include "core/hle/service/vi/display/vi_display.h"
#include "core/hle/service/vi/layer/vi_layer.h"
#include "core/perf_stats.h"
#include "video_core/renderer_base.h"
@@ -29,12 +28,6 @@ constexpr std::size_t SCREEN_REFRESH_RATE = 60;
constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
displays.emplace_back(0, "Default");
displays.emplace_back(1, "External");
displays.emplace_back(2, "Edid");
displays.emplace_back(3, "Internal");
displays.emplace_back(4, "Null");
// Schedule the screen composition events
composition_event =
core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
@@ -59,14 +52,13 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
// TODO(Subv): Currently we only support the Default display.
ASSERT(name == "Default");
const auto itr =
std::find_if(displays.begin(), displays.end(),
[&](const VI::Display& display) { return display.GetName() == name; });
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.name == name; });
if (itr == displays.end()) {
return {};
}
return itr->GetID();
return itr->id;
}
std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
@@ -76,10 +68,13 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
return {};
}
ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
const u64 layer_id = next_layer_id++;
const u32 buffer_queue_id = next_buffer_queue_id++;
buffer_queues.emplace_back(buffer_queue_id, layer_id);
display->CreateLayer(layer_id, buffer_queues.back());
auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
display->layers.emplace_back(layer_id, buffer_queue);
buffer_queues.emplace_back(std::move(buffer_queue));
return layer_id;
}
@@ -90,7 +85,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
return {};
}
return layer->GetBufferQueue().GetId();
return layer->buffer_queue->GetId();
}
Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
@@ -100,29 +95,20 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i
return nullptr;
}
return display->GetVSyncEvent();
return display->vsync_event.readable;
}
BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
[id](const auto& queue) { return queue.GetId() == id; });
[&](const auto& queue) { return queue->GetId() == id; });
ASSERT(itr != buffer_queues.end());
return *itr;
}
const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
[id](const auto& queue) { return queue.GetId() == id; });
ASSERT(itr != buffer_queues.end());
return *itr;
}
VI::Display* NVFlinger::FindDisplay(u64 display_id) {
const auto itr =
std::find_if(displays.begin(), displays.end(),
[&](const VI::Display& display) { return display.GetID() == display_id; });
Display* NVFlinger::FindDisplay(u64 display_id) {
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.id == display_id; });
if (itr == displays.end()) {
return nullptr;
@@ -131,10 +117,9 @@ VI::Display* NVFlinger::FindDisplay(u64 display_id) {
return &*itr;
}
const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
const auto itr =
std::find_if(displays.begin(), displays.end(),
[&](const VI::Display& display) { return display.GetID() == display_id; });
const Display* NVFlinger::FindDisplay(u64 display_id) const {
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.id == display_id; });
if (itr == displays.end()) {
return nullptr;
@@ -143,41 +128,57 @@ const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
return &*itr;
}
VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
auto* const display = FindDisplay(display_id);
if (display == nullptr) {
return nullptr;
}
return display->FindLayer(layer_id);
const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
[&](const Layer& layer) { return layer.id == layer_id; });
if (itr == display->layers.end()) {
return nullptr;
}
return &*itr;
}
const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
const Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
const auto* const display = FindDisplay(display_id);
if (display == nullptr) {
return nullptr;
}
return display->FindLayer(layer_id);
const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
[&](const Layer& layer) { return layer.id == layer_id; });
if (itr == display->layers.end()) {
return nullptr;
}
return &*itr;
}
void NVFlinger::Compose() {
for (auto& display : displays) {
// Trigger vsync for this display at the end of drawing
SCOPE_EXIT({ display.SignalVSyncEvent(); });
SCOPE_EXIT({ display.vsync_event.writable->Signal(); });
// Don't do anything for displays without layers.
if (!display.HasLayers())
if (display.layers.empty())
continue;
// TODO(Subv): Support more than 1 layer.
VI::Layer& layer = display.GetLayer(0);
auto& buffer_queue = layer.GetBufferQueue();
ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported");
Layer& layer = display.layers[0];
auto& buffer_queue = layer.buffer_queue;
// Search for a queued buffer and acquire it
auto buffer = buffer_queue.AcquireBuffer();
auto buffer = buffer_queue->AcquireBuffer();
MicroProfileFlip();
@@ -202,8 +203,19 @@ void NVFlinger::Compose() {
igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
buffer->get().transform, buffer->get().crop_rect);
buffer_queue.ReleaseBuffer(buffer->get().slot);
buffer_queue->ReleaseBuffer(buffer->get().slot);
}
}
Layer::Layer(u64 id, std::shared_ptr<BufferQueue> queue) : id(id), buffer_queue(std::move(queue)) {}
Layer::~Layer() = default;
Display::Display(u64 id, std::string name) : id(id), name(std::move(name)) {
auto& kernel = Core::System::GetInstance().Kernel();
vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
fmt::format("Display VSync Event {}", id));
}
Display::~Display() = default;
} // namespace Service::NVFlinger

View File

@@ -4,6 +4,7 @@
#pragma once
#include <array>
#include <memory>
#include <optional>
#include <string>
@@ -25,17 +26,31 @@ class WritableEvent;
namespace Service::Nvidia {
class Module;
} // namespace Service::Nvidia
namespace Service::VI {
class Display;
class Layer;
} // namespace Service::VI
}
namespace Service::NVFlinger {
class BufferQueue;
struct Layer {
Layer(u64 id, std::shared_ptr<BufferQueue> queue);
~Layer();
u64 id;
std::shared_ptr<BufferQueue> buffer_queue;
};
struct Display {
Display(u64 id, std::string name);
~Display();
u64 id;
std::string name;
std::vector<Layer> layers;
Kernel::EventPair vsync_event;
};
class NVFlinger final {
public:
explicit NVFlinger(Core::Timing::CoreTiming& core_timing);
@@ -65,10 +80,7 @@ public:
Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
/// Obtains a buffer queue identified by the ID.
BufferQueue& FindBufferQueue(u32 id);
/// Obtains a buffer queue identified by the ID.
const BufferQueue& FindBufferQueue(u32 id) const;
std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;
/// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
/// finished.
@@ -76,21 +88,27 @@ public:
private:
/// Finds the display identified by the specified ID.
VI::Display* FindDisplay(u64 display_id);
Display* FindDisplay(u64 display_id);
/// Finds the display identified by the specified ID.
const VI::Display* FindDisplay(u64 display_id) const;
const Display* FindDisplay(u64 display_id) const;
/// Finds the layer identified by the specified ID in the desired display.
VI::Layer* FindLayer(u64 display_id, u64 layer_id);
Layer* FindLayer(u64 display_id, u64 layer_id);
/// Finds the layer identified by the specified ID in the desired display.
const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;
const Layer* FindLayer(u64 display_id, u64 layer_id) const;
std::shared_ptr<Nvidia::Module> nvdrv;
std::vector<VI::Display> displays;
std::vector<BufferQueue> buffer_queues;
std::array<Display, 5> displays{{
{0, "Default"},
{1, "External"},
{2, "Edid"},
{3, "Internal"},
{4, "Null"},
}};
std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
/// Id to use for the next layer that is created, this counter is shared among all displays.
u64 next_layer_id = 1;

View File

@@ -1,71 +0,0 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <utility>
#include <fmt/format.h>
#include "common/assert.h"
#include "core/core.h"
#include "core/hle/kernel/readable_event.h"
#include "core/hle/service/vi/display/vi_display.h"
#include "core/hle/service/vi/layer/vi_layer.h"
namespace Service::VI {
Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
auto& kernel = Core::System::GetInstance().Kernel();
vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
fmt::format("Display VSync Event {}", id));
}
Display::~Display() = default;
Layer& Display::GetLayer(std::size_t index) {
return layers.at(index);
}
const Layer& Display::GetLayer(std::size_t index) const {
return layers.at(index);
}
Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
return vsync_event.readable;
}
void Display::SignalVSyncEvent() {
vsync_event.writable->Signal();
}
void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
// TODO(Subv): Support more than 1 layer.
ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
layers.emplace_back(id, buffer_queue);
}
Layer* Display::FindLayer(u64 id) {
const auto itr = std::find_if(layers.begin(), layers.end(),
[id](const VI::Layer& layer) { return layer.GetID() == id; });
if (itr == layers.end()) {
return nullptr;
}
return &*itr;
}
const Layer* Display::FindLayer(u64 id) const {
const auto itr = std::find_if(layers.begin(), layers.end(),
[id](const VI::Layer& layer) { return layer.GetID() == id; });
if (itr == layers.end()) {
return nullptr;
}
return &*itr;
}
} // namespace Service::VI

View File

@@ -1,98 +0,0 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <vector>
#include "common/common_types.h"
#include "core/hle/kernel/writable_event.h"
namespace Service::NVFlinger {
class BufferQueue;
}
namespace Service::VI {
class Layer;
/// Represents a single display type
class Display {
public:
/// Constructs a display with a given unique ID and name.
///
/// @param id The unique ID for this display.
/// @param name The name for this display.
///
Display(u64 id, std::string name);
~Display();
Display(const Display&) = delete;
Display& operator=(const Display&) = delete;
Display(Display&&) = default;
Display& operator=(Display&&) = default;
/// Gets the unique ID assigned to this display.
u64 GetID() const {
return id;
}
/// Gets the name of this display
const std::string& GetName() const {
return name;
}
/// Whether or not this display has any layers added to it.
bool HasLayers() const {
return !layers.empty();
}
/// Gets a layer for this display based off an index.
Layer& GetLayer(std::size_t index);
/// Gets a layer for this display based off an index.
const Layer& GetLayer(std::size_t index) const;
/// Gets the readable vsync event.
Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
/// Signals the internal vsync event.
void SignalVSyncEvent();
/// Creates and adds a layer to this display with the given ID.
///
/// @param id The ID to assign to the created layer.
/// @param buffer_queue The buffer queue for the layer instance to use.
///
void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
/// Attempts to find a layer with the given ID.
///
/// @param id The layer ID.
///
/// @returns If found, the Layer instance with the given ID.
/// If not found, then nullptr is returned.
///
Layer* FindLayer(u64 id);
/// Attempts to find a layer with the given ID.
///
/// @param id The layer ID.
///
/// @returns If found, the Layer instance with the given ID.
/// If not found, then nullptr is returned.
///
const Layer* FindLayer(u64 id) const;
private:
u64 id;
std::string name;
std::vector<Layer> layers;
Kernel::EventPair vsync_event;
};
} // namespace Service::VI

View File

@@ -1,13 +0,0 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/hle/service/vi/layer/vi_layer.h"
namespace Service::VI {
Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
Layer::~Layer() = default;
} // namespace Service::VI

View File

@@ -1,52 +0,0 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
namespace Service::NVFlinger {
class BufferQueue;
}
namespace Service::VI {
/// Represents a single display layer.
class Layer {
public:
/// Constructs a layer with a given ID and buffer queue.
///
/// @param id The ID to assign to this layer.
/// @param queue The buffer queue for this layer to use.
///
Layer(u64 id, NVFlinger::BufferQueue& queue);
~Layer();
Layer(const Layer&) = delete;
Layer& operator=(const Layer&) = delete;
Layer(Layer&&) = default;
Layer& operator=(Layer&&) = delete;
/// Gets the ID for this layer.
u64 GetID() const {
return id;
}
/// Gets a reference to the buffer queue this layer is using.
NVFlinger::BufferQueue& GetBufferQueue() {
return buffer_queue;
}
/// Gets a const reference to the buffer queue this layer is using.
const NVFlinger::BufferQueue& GetBufferQueue() const {
return buffer_queue;
}
private:
u64 id;
NVFlinger::BufferQueue& buffer_queue;
};
} // namespace Service::VI

View File

@@ -420,7 +420,7 @@ public:
u32_le fence_is_valid;
std::array<Fence, 2> fences;
Common::Rectangle<int> GetCropRect() const {
MathUtil::Rectangle<int> GetCropRect() const {
return {crop_left, crop_top, crop_right, crop_bottom};
}
};
@@ -525,7 +525,7 @@ private:
LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
static_cast<u32>(transaction), flags);
auto& buffer_queue = nv_flinger->FindBufferQueue(id);
auto buffer_queue = nv_flinger->FindBufferQueue(id);
if (transaction == TransactionId::Connect) {
IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -538,7 +538,7 @@ private:
} else if (transaction == TransactionId::SetPreallocatedBuffer) {
IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer);
IGBPSetPreallocatedBufferResponseParcel response{};
ctx.WriteBuffer(response.Serialize());
@@ -546,7 +546,7 @@ private:
IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
const u32 width{request.data.width};
const u32 height{request.data.height};
std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
if (slot) {
// Buffer is available
@@ -559,8 +559,8 @@ private:
[=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
Kernel::ThreadWakeupReason reason) {
// Repeat TransactParcel DequeueBuffer when a buffer is available
auto& buffer_queue = nv_flinger->FindBufferQueue(id);
std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
auto buffer_queue = nv_flinger->FindBufferQueue(id);
std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");
IGBPDequeueBufferResponseParcel response{*slot};
@@ -568,28 +568,28 @@ private:
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
},
buffer_queue.GetWritableBufferWaitEvent());
buffer_queue->GetWritableBufferWaitEvent());
}
} else if (transaction == TransactionId::RequestBuffer) {
IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
auto& buffer = buffer_queue.RequestBuffer(request.slot);
auto& buffer = buffer_queue->RequestBuffer(request.slot);
IGBPRequestBufferResponseParcel response{buffer};
ctx.WriteBuffer(response.Serialize());
} else if (transaction == TransactionId::QueueBuffer) {
IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
request.data.GetCropRect());
buffer_queue->QueueBuffer(request.data.slot, request.data.transform,
request.data.GetCropRect());
IGBPQueueBufferResponseParcel response{1280, 720};
ctx.WriteBuffer(response.Serialize());
} else if (transaction == TransactionId::Query) {
IGBPQueryRequestParcel request{ctx.ReadBuffer()};
const u32 value =
buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
u32 value =
buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
IGBPQueryResponseParcel response{value};
ctx.WriteBuffer(response.Serialize());
@@ -629,12 +629,12 @@ private:
LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
const auto buffer_queue = nv_flinger->FindBufferQueue(id);
// TODO(Subv): Find out what this actually is.
IPC::ResponseBuilder rb{ctx, 2, 1};
rb.Push(RESULT_SUCCESS);
rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent());
}
std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -752,7 +752,6 @@ public:
{1102, nullptr, "GetDisplayResolution"},
{2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
{2011, nullptr, "DestroyManagedLayer"},
{2012, nullptr, "CreateStrayLayer"},
{2050, nullptr, "CreateIndirectLayer"},
{2051, nullptr, "DestroyIndirectLayer"},
{2052, nullptr, "CreateIndirectProducerEndPoint"},

View File

@@ -71,20 +71,15 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
FlushMode::FlushAndInvalidate);
VAddr end = base + size;
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
base + page_table.pointers.size());
while (base != end) {
ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base);
std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
page_table.attributes[base] = type;
page_table.pointers[base] = memory;
if (memory == nullptr) {
std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
} else {
while (base != end) {
page_table.pointers[base] = memory;
base += 1;
base += 1;
if (memory != nullptr)
memory += PAGE_SIZE;
}
}
}

View File

@@ -32,12 +32,12 @@ public:
}
void BeginTilt(int x, int y) {
mouse_origin = Common::MakeVec(x, y);
mouse_origin = Math::MakeVec(x, y);
is_tilting = true;
}
void Tilt(int x, int y) {
auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
auto mouse_move = Math::MakeVec(x, y) - mouse_origin;
if (is_tilting) {
std::lock_guard<std::mutex> guard(tilt_mutex);
if (mouse_move.x == 0 && mouse_move.y == 0) {
@@ -45,7 +45,7 @@ public:
} else {
tilt_direction = mouse_move.Cast<float>();
tilt_angle =
std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f);
}
}
}
@@ -56,7 +56,7 @@ public:
is_tilting = false;
}
std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() {
std::lock_guard<std::mutex> guard(status_mutex);
return status;
}
@@ -66,17 +66,17 @@ private:
const std::chrono::steady_clock::duration update_duration;
const float sensitivity;
Common::Vec2<int> mouse_origin;
Math::Vec2<int> mouse_origin;
std::mutex tilt_mutex;
Common::Vec2<float> tilt_direction;
Math::Vec2<float> tilt_direction;
float tilt_angle = 0;
bool is_tilting = false;
Common::Event shutdown_event;
std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
std::tuple<Math::Vec3<float>, Math::Vec3<float>> status;
std::mutex status_mutex;
// Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,8 +85,8 @@ private:
void MotionEmuThread() {
auto update_time = std::chrono::steady_clock::now();
Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
Common::Quaternion<float> old_q;
Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0);
Math::Quaternion<float> old_q;
while (!shutdown_event.WaitUntil(update_time)) {
update_time += update_duration;
@@ -96,18 +96,18 @@ private:
std::lock_guard<std::mutex> guard(tilt_mutex);
// Find the quaternion describing current 3DS tilting
q = Common::MakeQuaternion(
Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x),
tilt_angle);
}
auto inv_q = q.Inverse();
// Set the gravity vector in world space
auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f);
// Find the angular rate vector in world space
auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
angular_rate *= 1000 / update_millisecond / Common::PI * 180;
angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180;
// Transform the two vectors from world space to 3DS space
gravity = QuaternionRotate(inv_q, gravity);
@@ -131,7 +131,7 @@ public:
device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
}
std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override {
return device->GetStatus();
}

View File

@@ -104,18 +104,8 @@ add_library(video_core STATIC
if (ENABLE_VULKAN)
target_sources(video_core PRIVATE
renderer_vulkan/declarations.h
renderer_vulkan/vk_buffer_cache.cpp
renderer_vulkan/vk_buffer_cache.h
renderer_vulkan/vk_device.cpp
renderer_vulkan/vk_device.h
renderer_vulkan/vk_memory_manager.cpp
renderer_vulkan/vk_memory_manager.h
renderer_vulkan/vk_resource_manager.cpp
renderer_vulkan/vk_resource_manager.h
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h)
renderer_vulkan/vk_device.h)
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)

View File

@@ -33,36 +33,18 @@ void DmaPusher::DispatchCalls() {
}
bool DmaPusher::Step() {
if (!ib_enable || dma_pushbuffer.empty()) {
// pushbuffer empty and IB empty or nonexistent - nothing to do
return false;
}
if (dma_get != dma_put) {
// Push buffer non-empty, read a word
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
ASSERT_MSG(address, "Invalid GPU address");
const CommandList& command_list{dma_pushbuffer.front()};
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
GPUVAddr dma_get = command_list_header.addr;
GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
bool non_main = command_list_header.is_non_main;
const CommandHeader command_header{Memory::Read32(*address)};
if (dma_pushbuffer_subindex >= command_list.size()) {
// We've gone through the current list, remove it from the queue
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
}
dma_get += sizeof(u32);
if (command_list_header.size == 0) {
return true;
}
// Push buffer non-empty, read a word
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
ASSERT_MSG(address, "Invalid GPU address");
command_headers.resize(command_list_header.size);
Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
for (const CommandHeader& command_header : command_headers) {
if (!non_main) {
dma_mget = dma_get;
}
// now, see if we're in the middle of a command
if (dma_state.length_pending) {
@@ -109,11 +91,22 @@ bool DmaPusher::Step() {
break;
}
}
}
} else if (ib_enable && !dma_pushbuffer.empty()) {
// Current pushbuffer empty, but we have more IB entries to read
const CommandList& command_list{dma_pushbuffer.front()};
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
dma_get = command_list_header.addr;
dma_put = dma_get + command_list_header.size * sizeof(u32);
non_main = command_list_header.is_non_main;
if (!non_main) {
// TODO (degasus): This is dead code, as dma_mget is never read.
dma_mget = dma_put;
if (dma_pushbuffer_subindex >= command_list.size()) {
// We've gone through the current list, remove it from the queue
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
}
} else {
// Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
return {};
}
return true;

View File

@@ -75,8 +75,6 @@ private:
GPU& gpu;
std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
@@ -91,8 +89,11 @@ private:
DmaState dma_state{};
bool dma_increment_once{};
GPUVAddr dma_put{}; ///< pushbuffer current end address
GPUVAddr dma_get{}; ///< pushbuffer current read address
GPUVAddr dma_mget{}; ///< main pushbuffer last read address
bool ib_enable{true}; ///< IB mode enabled
bool non_main{}; ///< non-main pushbuffer active
};
} // namespace Tegra

View File

@@ -44,10 +44,10 @@ void Fermi2D::HandleSurfaceCopy() {
const u32 src_blit_y2{
static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
regs.blit_dst_x + regs.blit_dst_width,
regs.blit_dst_y + regs.blit_dst_height};
const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
regs.blit_dst_x + regs.blit_dst_width,
regs.blit_dst_y + regs.blit_dst_height};
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
UNIMPLEMENTED();

View File

@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
@@ -12,9 +11,9 @@
namespace Tegra::Engines {
KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}
: memory_manager(memory_manager), rasterizer{rasterizer} {}
KeplerMemory::~KeplerMemory() = default;
@@ -51,7 +50,7 @@ void KeplerMemory::ProcessData(u32 data) {
rasterizer.InvalidateRegion(*dest_address, sizeof(u32));
Memory::Write32(*dest_address, data);
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
state.write_offset++;
}

View File

@@ -5,16 +5,13 @@
#pragma once
#include <array>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
@@ -26,8 +23,7 @@ namespace Tegra::Engines {
class KeplerMemory final {
public:
KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
KeplerMemory(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
~KeplerMemory();
/// Write the value to the register identified by method.
@@ -80,7 +76,6 @@ public:
} state{};
private:
Core::System& system;
MemoryManager& memory_manager;
VideoCore::RasterizerInterface& rasterizer;

View File

@@ -19,10 +19,8 @@ namespace Tegra::Engines {
/// First register id that is actually a Macro call.
constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
macro_interpreter(*this) {
Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
: memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {
InitializeRegisterDefaults();
}
@@ -105,25 +103,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
}
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
auto debug_context = system.GetGPUDebugContext();
const u32 method = method_call.method;
auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
ASSERT(method == executing_macro + 1);
ASSERT(method_call.method == executing_macro + 1);
}
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization.
if (method >= MacroRegistersStart) {
if (method_call.method >= MacroRegistersStart) {
// We're trying to execute a macro
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
ASSERT_MSG((method % 2) == 0,
ASSERT_MSG((method_call.method % 2) == 0,
"Can't start macro execution by writing to the ARGS register");
executing_macro = method;
executing_macro = method_call.method;
}
macro_params.push_back(method_call.argument);
@@ -135,62 +131,66 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
return;
}
ASSERT_MSG(method < Regs::NUM_REGS,
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
}
if (regs.reg_array[method] != method_call.argument) {
regs.reg_array[method] = method_call.argument;
if (regs.reg_array[method_call.method] != method_call.argument) {
regs.reg_array[method_call.method] = method_call.argument;
// Color buffers
constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
if (method >= first_rt_reg &&
method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
dirty_flags.color_buffer.set(rt_index);
if (method_call.method >= first_rt_reg &&
method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
}
// Zeta buffer
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
method == MAXWELL3D_REG_INDEX(zeta_width) ||
method == MAXWELL3D_REG_INDEX(zeta_height) ||
(method >= MAXWELL3D_REG_INDEX(zeta) &&
method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
(method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
dirty_flags.zeta_buffer = true;
}
// Shader
constexpr u32 shader_registers_count =
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
dirty_flags.shaders = true;
}
// Vertex format
if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
method_call.method <
MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
dirty_flags.vertex_attrib_format = true;
}
// Vertex buffer
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) &&
method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
dirty_flags.vertex_array |=
1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
} else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
dirty_flags.vertex_array |=
1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
} else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
dirty_flags.vertex_array |=
1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
}
}
switch (method) {
switch (method_call.method) {
case MAXWELL3D_REG_INDEX(macros.data): {
ProcessMacroUpload(method_call.argument);
break;
@@ -317,7 +317,7 @@ void Maxwell3D::ProcessQueryGet() {
LongQueryResult query_result{};
query_result.value = result;
// TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
query_result.timestamp = system.CoreTiming().GetTicks();
query_result.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
Memory::WriteBlock(*address, &query_result, sizeof(query_result));
}
dirty_flags.OnMemoryWrite();
@@ -334,7 +334,7 @@ void Maxwell3D::DrawArrays() {
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
auto debug_context = system.GetGPUDebugContext();
auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);

View File

@@ -5,10 +5,8 @@
#pragma once
#include <array>
#include <bitset>
#include <unordered_map>
#include <vector>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
@@ -19,10 +17,6 @@
#include "video_core/memory_manager.h"
#include "video_core/textures/texture.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
@@ -34,8 +28,7 @@ namespace Tegra::Engines {
class Maxwell3D final {
public:
explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
~Maxwell3D() = default;
/// Register structure of the Maxwell3D engine.
@@ -505,7 +498,7 @@ public:
f32 translate_z;
INSERT_PADDING_WORDS(2);
Common::Rectangle<s32> GetRect() const {
MathUtil::Rectangle<s32> GetRect() const {
return {
GetX(), // left
GetY() + GetHeight(), // top
@@ -1096,18 +1089,19 @@ public:
MemoryManager& memory_manager;
struct DirtyFlags {
std::bitset<8> color_buffer{0xFF};
std::bitset<32> vertex_array{0xFFFFFFFF};
bool vertex_attrib_format = true;
u8 color_buffer = 0xFF;
bool zeta_buffer = true;
bool shaders = true;
bool vertex_attrib_format = true;
u32 vertex_array = 0xFFFFFFFF;
void OnMemoryWrite() {
color_buffer = 0xFF;
zeta_buffer = true;
shaders = true;
color_buffer.set();
vertex_array.set();
vertex_array = 0xFFFFFFFF;
}
};
@@ -1137,8 +1131,6 @@ public:
private:
void InitializeRegisterDefaults();
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
/// Start offsets of each macro in macro_memory

View File

@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
@@ -12,9 +11,8 @@
namespace Tegra::Engines {
MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}
MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
: memory_manager(memory_manager), rasterizer{rasterizer} {}
void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -61,7 +59,7 @@ void MaxwellDMA::HandleCopy() {
}
// All copies here update the main memory, so mark all rasterizer states as invalid.
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D

View File

@@ -5,16 +5,13 @@
#pragma once
#include <array>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
@@ -23,8 +20,7 @@ namespace Tegra::Engines {
class MaxwellDMA final {
public:
explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
explicit MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
~MaxwellDMA() = default;
/// Write the value to the register identified by method.
@@ -141,8 +137,6 @@ public:
MemoryManager& memory_manager;
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
/// Performs the copy from the source buffer to the destination buffer as configured in the

View File

@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
};
enum class IpaInterpMode : u64 {
Pass = 0,
Multiply = 1,
Constant = 2,
Linear = 0,
Perspective = 1,
Flat = 2,
Sc = 3,
};

View File

@@ -16,13 +16,6 @@ enum class OutputTopology : u32 {
TriangleStrip = 7,
};
enum class AttributeUse : u8 {
Unused = 0,
Constant = 1,
Perspective = 2,
ScreenLinear = 3,
};
// Documentation in:
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
struct Header {
@@ -91,15 +84,9 @@ struct Header {
} vtg;
struct {
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
union {
BitField<0, 2, AttributeUse> x;
BitField<2, 2, AttributeUse> y;
BitField<4, 2, AttributeUse> w;
BitField<6, 2, AttributeUse> z;
u8 raw;
} imap_generic_vector[32];
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
INSERT_PADDING_BYTES(2); // ImapColor
INSERT_PADDING_BYTES(2); // ImapSystemValuesC
INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -116,28 +103,6 @@ struct Header {
const u32 bit = render_target * 4 + component;
return omap.target & (1 << bit);
}
AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
return static_cast<AttributeUse>(
(imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
}
AttributeUse GetAttributeUse(u32 attribute) const {
AttributeUse result = AttributeUse::Unused;
for (u32 i = 0; i < 4; i++) {
const auto index = GetAttributeIndexUse(attribute, i);
if (index == AttributeUse::Unused) {
continue;
}
if (result == AttributeUse::Unused || result == index) {
result = index;
continue;
}
LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
if (index == AttributeUse::Perspective) {
result = index;
}
}
return result;
}
} ps;
};

View File

@@ -28,14 +28,14 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
UNREACHABLE();
}
GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) {
GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
memory_manager = std::make_unique<Tegra::MemoryManager>();
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
}
GPU::~GPU() = default;

View File

@@ -6,15 +6,12 @@
#include <array>
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/dma_pusher.h"
#include "video_core/memory_manager.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
@@ -100,7 +97,7 @@ struct FramebufferConfig {
using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
TransformFlags transform_flags;
Common::Rectangle<int> crop_rect;
MathUtil::Rectangle<int> crop_rect;
};
namespace Engines {
@@ -121,7 +118,7 @@ enum class EngineID {
class GPU final {
public:
explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
explicit GPU(VideoCore::RasterizerInterface& rasterizer);
~GPU();
struct MethodCall {

View File

@@ -129,15 +129,6 @@ protected:
return ++modified_ticks;
}
/// Flushes the specified object, updating appropriate cache state as needed
void FlushObject(const T& object) {
if (!object->IsDirty()) {
return;
}
object->Flush();
object->MarkAsModified(false, *this);
}
private:
/// Returns a list of cached objects from the specified memory region, ordered by access time
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
@@ -163,6 +154,15 @@ private:
return objects;
}
/// Flushes the specified object, updating appropriate cache state as needed
void FlushObject(const T& object) {
if (!object->IsDirty()) {
return;
}
object->Flush();
object->MarkAsModified(false, *this);
}
using ObjectSet = std::set<T>;
using ObjectCache = std::unordered_map<VAddr, T>;
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;

View File

@@ -47,8 +47,8 @@ public:
/// Attempt to use a faster method to perform a surface copy
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect) {
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect) {
return false;
}

View File

@@ -102,8 +102,8 @@ struct FramebufferCacheKey {
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
ScreenInfo& info)
: res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
: res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
}
// Rebinding the VAO invalidates the vertex buffer bindings.
gpu.dirty_flags.vertex_array.set();
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
state.draw.vertex_array = vao_entry.handle;
return vao_entry.handle;
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
if (gpu.dirty_flags.vertex_array.none())
if (!gpu.dirty_flags.vertex_array)
return;
MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (!gpu.dirty_flags.vertex_array[index])
if (~gpu.dirty_flags.vertex_array & (1u << index))
continue;
const auto& vertex_array = regs.vertex_array[index];
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
}
}
gpu.dirty_flags.vertex_array.reset();
gpu.dirty_flags.vertex_array = 0;
}
DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
std::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
single_color_target};
if (fb_config_state == current_framebuffer_config_state &&
gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
!gpu.dirty_flags.zeta_buffer) {
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
// single color targets). This is done because the guest registers may not change but the
// host framebuffer may contain different attachments
@@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
// Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
const bool invalidate = buffer_cache.Map(buffer_size);
bool invalidate = buffer_cache.Map(buffer_size);
if (invalidate) {
// As all cached buffers are invalidated, we need to recheck their state.
gpu.dirty_flags.vertex_array.set();
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
}
const GLuint vao = SetupVertexFormat();
@@ -738,11 +738,19 @@ void RasterizerOpenGL::DrawArrays() {
shader_program_manager->ApplyTo(state);
state.Apply();
res_cache.SignalPreDrawCall();
// Execute draw call
params.DispatchDraw();
res_cache.SignalPostDrawCall();
// Disable scissor test
state.viewports[0].scissor.enabled = false;
accelerate_draw = AccelDraw::Disabled;
// Unbind textures for potential future use as framebuffer attachments
for (auto& texture_unit : state.texture_units) {
texture_unit.Unbind();
}
state.Apply();
}
void RasterizerOpenGL::FlushAll() {}
@@ -771,8 +779,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect) {
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect) {
MICROPROFILE_SCOPE(OpenGL_Blits);
res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
return true;
@@ -1026,7 +1034,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
for (std::size_t i = 0; i < viewport_count; i++) {
auto& viewport = current_state.viewports[i];
const auto& src = regs.viewports[i];
const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
viewport.x = viewport_rect.left;
viewport.y = viewport_rect.bottom;
viewport.width = viewport_rect.GetWidth();

View File

@@ -62,8 +62,8 @@ public:
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect) override;
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
bool AccelerateDrawBatch(bool is_indexed) override;

View File

@@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <optional>
#include <glad/glad.h>
#include "common/alignment.h"
@@ -400,7 +399,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
return format;
}
Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
if (IsPixelFormatASTC(pixel_format)) {
// ASTC formats must stop at the ATSC block size boundary
@@ -424,7 +423,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
for (u32 i = 0; i < params.depth; i++) {
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
params.MipBlockDepth(mip_level), params.tile_width_spacing, 1,
gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
offset += layer_size;
offset_gl += gl_size;
@@ -550,8 +549,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
// alternatives. This signals a bug on those functions.
const auto width = static_cast<GLsizei>(params.MipWidth(0));
const auto height = static_cast<GLsizei>(params.MipHeight(0));
memory_size = params.MemorySize();
reinterpreted = false;
const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
gl_internal_format = format_tuple.internal_format;
@@ -965,31 +962,30 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
const auto& regs{gpu.regs};
if (!gpu.dirty_flags.color_buffer[index]) {
return current_color_buffers[index];
if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
return last_color_buffers[index];
}
gpu.dirty_flags.color_buffer.reset(index);
gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
if (index >= regs.rt_control.count) {
return current_color_buffers[index] = {};
return last_color_buffers[index] = {};
}
if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
return current_color_buffers[index] = {};
return last_color_buffers[index] = {};
}
const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
}
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
surface->LoadGLBuffer();
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->MarkAsModified(false, *this);
surface->MarkForReload(false);
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -1001,23 +997,18 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
Surface surface{TryGet(params.addr)};
if (surface) {
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
// Use the cached surface as-is unless it's not synced with memory
if (surface->MustReload())
LoadSurface(surface);
// Use the cached surface as-is
return surface;
} else if (preserve_contents) {
// If surface parameters changed and we care about keeping the previous data, recreate
// the surface from the old one
Surface new_surface{RecreateSurface(surface, params)};
UnregisterSurface(surface);
Unregister(surface);
Register(new_surface);
if (new_surface->IsUploaded()) {
RegisterReinterpretSurface(new_surface);
}
return new_surface;
} else {
// Delete the old surface before creating a new one to prevent collisions.
UnregisterSurface(surface);
Unregister(surface);
}
}
@@ -1071,8 +1062,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
}
static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
std::size_t cubemap_face = 0) {
@@ -1202,7 +1193,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
void RasterizerCacheOpenGL::FermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) {
const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
@@ -1266,11 +1257,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
if (old_params.pixel_format == new_params.pixel_format)
FastLayeredCopySurface(old_surface, new_surface);
else {
AccurateCopySurface(old_surface, new_surface);
}
FastLayeredCopySurface(old_surface, new_surface);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@@ -1299,107 +1286,4 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
return {};
}
static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
u32 height) {
for (u32 i = 0; i < params.max_mip_level; i++) {
if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
return {i};
}
}
return {};
}
static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
const std::size_t size = params.LayerMemorySize();
VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
for (u32 i = 0; i < params.depth; i++) {
if (start == addr) {
return {i};
}
start += size;
}
return {};
}
static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
const Surface blitted_surface) {
const auto& dst_params = blitted_surface->GetSurfaceParams();
const auto& src_params = render_surface->GetSurfaceParams();
const std::size_t src_memory_size = src_params.size_in_bytes;
const std::optional<u32> level =
TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
if (level.has_value()) {
if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
src_params.height == dst_params.MipHeight(*level) &&
src_params.block_height >= dst_params.MipBlockHeight(*level)) {
const std::optional<u32> slot =
TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
if (slot.has_value()) {
glCopyImageSubData(render_surface->Texture().handle,
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
blitted_surface->Texture().handle,
SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
blitted_surface->MarkAsModified(true, cache);
return true;
}
}
}
return false;
}
static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
if (bound2 > bound1)
return true;
const auto& dst_params = blitted_surface->GetSurfaceParams();
const auto& src_params = render_surface->GetSurfaceParams();
return (dst_params.component_type != src_params.component_type);
}
static bool IsReinterpretInvalidSecond(const Surface render_surface,
const Surface blitted_surface) {
const auto& dst_params = blitted_surface->GetSurfaceParams();
const auto& src_params = render_surface->GetSurfaceParams();
return (dst_params.height > src_params.height && dst_params.width > src_params.width);
}
bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
Surface intersect) {
if (IsReinterpretInvalid(triggering_surface, intersect)) {
UnregisterSurface(intersect);
return false;
}
if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
UnregisterSurface(intersect);
return false;
}
FlushObject(intersect);
FlushObject(triggering_surface);
intersect->MarkForReload(true);
}
return true;
}
void RasterizerCacheOpenGL::SignalPreDrawCall() {
if (texception && GLAD_GL_ARB_texture_barrier) {
glTextureBarrier();
}
texception = false;
}
void RasterizerCacheOpenGL::SignalPostDrawCall() {
for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
if (current_color_buffers[i] != nullptr) {
Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
if (intersect != nullptr) {
PartialReinterpretSurface(current_color_buffers[i], intersect);
texception = true;
}
}
}
}
} // namespace OpenGL

View File

@@ -28,13 +28,12 @@ namespace OpenGL {
class CachedSurface;
using Surface = std::shared_ptr<CachedSurface>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
using SurfaceType = VideoCore::Surface::SurfaceType;
using PixelFormat = VideoCore::Surface::PixelFormat;
using ComponentType = VideoCore::Surface::ComponentType;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct SurfaceParams {
enum class SurfaceClass {
@@ -72,7 +71,7 @@ struct SurfaceParams {
}
/// Returns the rectangle corresponding to this surface
Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;
/// Returns the total size of this surface in bytes, adjusted for compression
std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -141,18 +140,10 @@ struct SurfaceParams {
return offset;
}
std::size_t GetMipmapSingleSize(u32 mip_level) const {
return InnerMipmapMemorySize(mip_level, false, is_layered);
}
u32 MipWidth(u32 mip_level) const {
return std::max(1U, width >> mip_level);
}
u32 MipWidthGobAligned(u32 mip_level) const {
return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
}
u32 MipHeight(u32 mip_level) const {
return std::max(1U, height >> mip_level);
}
@@ -355,10 +346,6 @@ public:
return cached_size_in_bytes;
}
std::size_t GetMemorySize() const {
return memory_size;
}
void Flush() override {
FlushGLBuffer();
}
@@ -408,26 +395,6 @@ public:
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w);
void MarkReinterpreted() {
reinterpreted = true;
}
bool IsReinterpreted() const {
return reinterpreted;
}
void MarkForReload(bool reload) {
must_reload = reload;
}
bool MustReload() const {
return must_reload;
}
bool IsUploaded() const {
return params.identity == SurfaceParams::SurfaceClass::Uploaded;
}
private:
void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
@@ -441,9 +408,6 @@ private:
GLenum gl_internal_format{};
std::size_t cached_size_in_bytes{};
std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
std::size_t memory_size;
bool reinterpreted = false;
bool must_reload = false;
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -466,11 +430,8 @@ public:
/// Copies the contents of one surface to another
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect);
void SignalPreDrawCall();
void SignalPostDrawCall();
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect);
private:
void LoadSurface(const Surface& surface);
@@ -488,10 +449,6 @@ private:
/// Tries to get a reserved surface for the specified parameters
Surface TryGetReservedSurface(const SurfaceParams& params);
// Partialy reinterpret a surface based on a triggering_surface that collides with it.
// returns true if the reinterpret was successful, false in case it was not.
bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
/// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
@@ -508,50 +465,12 @@ private:
OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer;
bool texception = false;
/// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
/// using the new format.
OGLBuffer copy_pbo;
std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
Surface last_depth_buffer;
using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
static auto GetReinterpretInterval(const Surface& object) {
return SurfaceInterval::right_open(object->GetAddr() + 1,
object->GetAddr() + object->GetMemorySize() - 1);
}
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
SurfaceIntervalCache reinterpreted_surfaces;
void RegisterReinterpretSurface(Surface reinterpret_surface) {
auto interval = GetReinterpretInterval(reinterpret_surface);
reinterpreted_surfaces.insert({interval, reinterpret_surface});
reinterpret_surface->MarkReinterpreted();
}
Surface CollideOnReinterpretedSurface(VAddr addr) const {
const SurfaceInterval interval{addr};
for (auto& pair :
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
return pair.second;
}
return nullptr;
}
/// Unregisters an object from the cache
void UnregisterSurface(const Surface& object) {
if (object->IsReinterpreted()) {
auto interval = GetReinterpretInterval(object);
reinterpreted_surfaces.erase(interval);
}
Unregister(object);
}
};
} // namespace OpenGL

View File

@@ -20,7 +20,6 @@
namespace OpenGL::GLShader {
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::Header;
using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
@@ -289,22 +288,34 @@ private:
code.AddNewLine();
}
std::string GetInputFlags(AttributeUse attribute) {
std::string GetInputFlags(const IpaMode& input_mode) {
const IpaSampleMode sample_mode = input_mode.sampling_mode;
const IpaInterpMode interp_mode = input_mode.interpolation_mode;
std::string out;
switch (attribute) {
case AttributeUse::Constant:
switch (interp_mode) {
case IpaInterpMode::Flat:
out += "flat ";
break;
case AttributeUse::ScreenLinear:
case IpaInterpMode::Linear:
out += "noperspective ";
break;
case AttributeUse::Perspective:
case IpaInterpMode::Perspective:
// Default, Smooth
break;
default:
LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
UNREACHABLE();
UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
}
switch (sample_mode) {
case IpaSampleMode::Centroid:
// It can be implemented with the "centroid " keyword in GLSL
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
break;
case IpaSampleMode::Default:
// Default, n/a
break;
default:
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
}
return out;
}
@@ -313,11 +324,16 @@ private:
const auto& attributes = ir.GetInputAttributes();
for (const auto element : attributes) {
const Attribute::Index index = element.first;
const IpaMode& input_mode = *element.second.begin();
if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
// Skip when it's not a generic attribute
continue;
}
ASSERT(element.second.size() > 0);
UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
"Multiple input flag modes are not supported in GLSL");
// TODO(bunnei): Use proper number of elements for these
u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
if (stage != ShaderStage::Vertex) {
@@ -329,14 +345,8 @@ private:
if (stage == ShaderStage::Geometry) {
attr = "gs_" + attr + "[]";
}
std::string suffix;
if (stage == ShaderStage::Fragment) {
const auto input_mode =
header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
suffix = GetInputFlags(input_mode);
}
code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
attr + ';');
code.AddLine("layout (location = " + std::to_string(idx) + ") " +
GetInputFlags(input_mode) + "in vec4 " + attr + ';');
}
if (!attributes.empty())
code.AddNewLine();
@@ -1574,4 +1584,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
return {decompiler.GetResult(), decompiler.GetShaderEntries()};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

View File

@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstring>
#include <fmt/format.h>
#include <lz4.h>

View File

@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
layout (location = 0) in noperspective vec4 position;
layout (location = 0) in vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
return {out, program.second};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

View File

@@ -11,9 +11,7 @@
namespace OpenGL {
OpenGLState OpenGLState::cur_state;
bool OpenGLState::s_rgb_used;
OpenGLState::OpenGLState() {
// These all match default OpenGL values
geometry_shaders.enabled = false;
@@ -114,6 +112,7 @@ void OpenGLState::ApplyDefaultState() {
}
void OpenGLState::ApplySRgb() const {
// sRGB
if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
if (framebuffer_srgb.enabled) {
// Track if sRGB is used
@@ -126,20 +125,23 @@ void OpenGLState::ApplySRgb() const {
}
void OpenGLState::ApplyCulling() const {
if (cull.enabled != cur_state.cull.enabled) {
// Culling
const bool cull_changed = cull.enabled != cur_state.cull.enabled;
if (cull_changed) {
if (cull.enabled) {
glEnable(GL_CULL_FACE);
} else {
glDisable(GL_CULL_FACE);
}
}
if (cull.enabled) {
if (cull_changed || cull.mode != cur_state.cull.mode) {
glCullFace(cull.mode);
}
if (cull.mode != cur_state.cull.mode) {
glCullFace(cull.mode);
}
if (cull.front_face != cur_state.cull.front_face) {
glFrontFace(cull.front_face);
if (cull_changed || cull.front_face != cur_state.cull.front_face) {
glFrontFace(cull.front_face);
}
}
}
@@ -170,63 +172,72 @@ void OpenGLState::ApplyColorMask() const {
}
void OpenGLState::ApplyDepth() const {
if (depth.test_enabled != cur_state.depth.test_enabled) {
// Depth test
const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled;
if (depth_test_changed) {
if (depth.test_enabled) {
glEnable(GL_DEPTH_TEST);
} else {
glDisable(GL_DEPTH_TEST);
}
}
if (depth.test_func != cur_state.depth.test_func) {
if (depth.test_enabled &&
(depth_test_changed || depth.test_func != cur_state.depth.test_func)) {
glDepthFunc(depth.test_func);
}
// Depth mask
if (depth.write_mask != cur_state.depth.write_mask) {
glDepthMask(depth.write_mask);
}
}
void OpenGLState::ApplyPrimitiveRestart() const {
if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
const bool primitive_restart_changed =
primitive_restart.enabled != cur_state.primitive_restart.enabled;
if (primitive_restart_changed) {
if (primitive_restart.enabled) {
glEnable(GL_PRIMITIVE_RESTART);
} else {
glDisable(GL_PRIMITIVE_RESTART);
}
}
if (primitive_restart.index != cur_state.primitive_restart.index) {
if (primitive_restart_changed ||
(primitive_restart.enabled &&
primitive_restart.index != cur_state.primitive_restart.index)) {
glPrimitiveRestartIndex(primitive_restart.index);
}
}
void OpenGLState::ApplyStencilTest() const {
if (stencil.test_enabled != cur_state.stencil.test_enabled) {
const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled;
if (stencil_test_changed) {
if (stencil.test_enabled) {
glEnable(GL_STENCIL_TEST);
} else {
glDisable(GL_STENCIL_TEST);
}
}
const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
config.test_mask != prev_config.test_mask) {
glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
}
if (config.action_depth_fail != prev_config.action_depth_fail ||
config.action_depth_pass != prev_config.action_depth_pass ||
config.action_stencil_fail != prev_config.action_stencil_fail) {
glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
config.action_depth_pass);
}
if (config.write_mask != prev_config.write_mask) {
glStencilMaskSeparate(face, config.write_mask);
}
};
ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
if (stencil.test_enabled) {
auto config_stencil = [stencil_test_changed](GLenum face, const auto& config,
const auto& prev_config) {
if (stencil_test_changed || config.test_func != prev_config.test_func ||
config.test_ref != prev_config.test_ref ||
config.test_mask != prev_config.test_mask) {
glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
}
if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail ||
config.action_depth_pass != prev_config.action_depth_pass ||
config.action_stencil_fail != prev_config.action_stencil_fail) {
glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
config.action_depth_pass);
}
if (config.write_mask != prev_config.write_mask) {
glStencilMaskSeparate(face, config.write_mask);
}
};
config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
}
}
// Viewport does not affects glClearBuffer so emulate viewport using scissor test
void OpenGLState::EmulateViewportWithScissor() {
@@ -267,18 +278,19 @@ void OpenGLState::ApplyViewport() const {
updated.depth_range_far != current.depth_range_far) {
glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
}
if (updated.scissor.enabled != current.scissor.enabled) {
const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled;
if (scissor_changed) {
if (updated.scissor.enabled) {
glEnablei(GL_SCISSOR_TEST, i);
} else {
glDisablei(GL_SCISSOR_TEST, i);
}
}
if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
updated.scissor.width != current.scissor.width ||
updated.scissor.height != current.scissor.height) {
if (updated.scissor.enabled &&
(scissor_changed || updated.scissor.x != current.scissor.x ||
updated.scissor.y != current.scissor.y ||
updated.scissor.width != current.scissor.width ||
updated.scissor.height != current.scissor.height)) {
glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
updated.scissor.height);
}
@@ -290,23 +302,22 @@ void OpenGLState::ApplyViewport() const {
updated.height != current.height) {
glViewport(updated.x, updated.y, updated.width, updated.height);
}
if (updated.depth_range_near != current.depth_range_near ||
updated.depth_range_far != current.depth_range_far) {
glDepthRange(updated.depth_range_near, updated.depth_range_far);
}
if (updated.scissor.enabled != current.scissor.enabled) {
const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled;
if (scissor_changed) {
if (updated.scissor.enabled) {
glEnable(GL_SCISSOR_TEST);
} else {
glDisable(GL_SCISSOR_TEST);
}
}
if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
updated.scissor.width != current.scissor.width ||
updated.scissor.height != current.scissor.height) {
if (updated.scissor.enabled && (scissor_changed || updated.scissor.x != current.scissor.x ||
updated.scissor.y != current.scissor.y ||
updated.scissor.width != current.scissor.width ||
updated.scissor.height != current.scissor.height)) {
glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
updated.scissor.height);
}
@@ -316,7 +327,8 @@ void OpenGLState::ApplyViewport() const {
void OpenGLState::ApplyGlobalBlending() const {
const Blend& current = cur_state.blend[0];
const Blend& updated = blend[0];
if (updated.enabled != current.enabled) {
const bool blend_changed = updated.enabled != current.enabled;
if (blend_changed) {
if (updated.enabled) {
glEnable(GL_BLEND);
} else {
@@ -326,14 +338,15 @@ void OpenGLState::ApplyGlobalBlending() const {
if (!updated.enabled) {
return;
}
if (updated.src_rgb_func != current.src_rgb_func ||
if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
updated.dst_a_func != current.dst_a_func) {
glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
updated.dst_a_func);
}
if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
if (blend_changed || updated.rgb_equation != current.rgb_equation ||
updated.a_equation != current.a_equation) {
glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
}
}
@@ -341,22 +354,26 @@ void OpenGLState::ApplyGlobalBlending() const {
void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
const Blend& updated = blend[target];
const Blend& current = cur_state.blend[target];
if (updated.enabled != current.enabled || force) {
const bool blend_changed = updated.enabled != current.enabled || force;
if (blend_changed) {
if (updated.enabled) {
glEnablei(GL_BLEND, static_cast<GLuint>(target));
} else {
glDisablei(GL_BLEND, static_cast<GLuint>(target));
}
}
if (updated.src_rgb_func != current.src_rgb_func ||
if (!updated.enabled) {
return;
}
if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
updated.dst_a_func != current.dst_a_func) {
glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
}
if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
if (blend_changed || updated.rgb_equation != current.rgb_equation ||
updated.a_equation != current.a_equation) {
glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
updated.a_equation);
}
@@ -380,7 +397,8 @@ void OpenGLState::ApplyBlending() const {
}
void OpenGLState::ApplyLogicOp() const {
if (logic_op.enabled != cur_state.logic_op.enabled) {
const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled;
if (logic_op_changed) {
if (logic_op.enabled) {
glEnable(GL_COLOR_LOGIC_OP);
} else {
@@ -388,12 +406,14 @@ void OpenGLState::ApplyLogicOp() const {
}
}
if (logic_op.operation != cur_state.logic_op.operation) {
if (logic_op.enabled &&
(logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) {
glLogicOp(logic_op.operation);
}
}
void OpenGLState::ApplyPolygonOffset() const {
const bool fill_enable_changed =
polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
const bool line_enable_changed =
@@ -428,7 +448,9 @@ void OpenGLState::ApplyPolygonOffset() const {
}
}
if (factor_changed || units_changed || clamp_changed) {
if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) &&
(factor_changed || units_changed || clamp_changed)) {
if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
} else {
@@ -461,7 +483,7 @@ void OpenGLState::ApplyTextures() const {
if (has_delta) {
glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
textures.data() + first);
textures.data());
}
}
@@ -482,7 +504,7 @@ void OpenGLState::ApplySamplers() const {
}
if (has_delta) {
glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
samplers.data() + first);
samplers.data());
}
}
@@ -506,9 +528,9 @@ void OpenGLState::ApplyDepthClamp() const {
depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
return;
}
UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
"Unimplemented Depth Clamp Separation!");
if (depth_clamp.far_plane != depth_clamp.near_plane) {
UNIMPLEMENTED_MSG("Unimplemented Depth Clamp Separation!");
}
if (depth_clamp.far_plane || depth_clamp.near_plane) {
glEnable(GL_DEPTH_CLAMP);
} else {

View File

@@ -257,7 +257,6 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer) {
texture.width = framebuffer.width;
texture.height = framebuffer.height;
texture.pixel_format = framebuffer.pixel_format;
GLint internal_format;
switch (framebuffer.pixel_format) {
@@ -381,8 +380,7 @@ void RendererOpenGL::CaptureScreenshot() {
GLuint renderbuffer;
glGenRenderbuffers(1, &renderbuffer);
glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width,
layout.height);
glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB8, layout.width, layout.height);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
DrawScreen(layout);

View File

@@ -39,7 +39,7 @@ struct TextureInfo {
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
GLuint display_texture;
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
};
@@ -102,7 +102,7 @@ private:
/// Used for transforming the framebuffer orientation
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
Common::Rectangle<int> framebuffer_crop_rect;
MathUtil::Rectangle<int> framebuffer_crop_rect;
};
} // namespace OpenGL

View File

@@ -1,116 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <memory>
#include <optional>
#include <tuple>
#include "common/alignment.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
namespace Vulkan {
VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
: RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
vk::BufferUsageFlagBits::eIndexBuffer |
vk::BufferUsageFlagBits::eUniformBuffer;
const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
vk::AccessFlagBits::eUniformRead;
stream_buffer =
std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
vk::PipelineStageFlagBits::eAllCommands);
buffer_handle = stream_buffer->GetBuffer();
}
VKBufferCache::~VKBufferCache() = default;
u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
bool cache) {
const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
ASSERT(cpu_addr);
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
cache &= size >= 2048;
if (cache) {
if (auto entry = TryGet(*cpu_addr); entry) {
if (entry->size >= size && entry->alignment == alignment) {
return entry->offset;
}
Unregister(entry);
}
}
AlignBuffer(alignment);
const u64 uploaded_offset = buffer_offset;
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
buffer_ptr += size;
buffer_offset += size;
if (cache) {
auto entry = std::make_shared<CachedBufferEntry>();
entry->offset = uploaded_offset;
entry->size = size;
entry->alignment = alignment;
entry->addr = *cpu_addr;
Register(entry);
}
return uploaded_offset;
}
u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
AlignBuffer(alignment);
std::memcpy(buffer_ptr, raw_pointer, size);
const u64 uploaded_offset = buffer_offset;
buffer_ptr += size;
buffer_offset += size;
return uploaded_offset;
}
std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
AlignBuffer(alignment);
u8* const uploaded_ptr = buffer_ptr;
const u64 uploaded_offset = buffer_offset;
buffer_ptr += size;
buffer_offset += size;
return {uploaded_ptr, uploaded_offset};
}
void VKBufferCache::Reserve(std::size_t max_size) {
bool invalidate;
std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
buffer_offset = buffer_offset_base;
if (invalidate) {
InvalidateAll();
}
}
VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
}
void VKBufferCache::AlignBuffer(std::size_t alignment) {
// Align the offset, not the mapped pointer
const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
buffer_ptr += offset_aligned - buffer_offset;
buffer_offset = offset_aligned;
}
} // namespace Vulkan

View File

@@ -1,87 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <tuple>
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Tegra {
class MemoryManager;
}
namespace Vulkan {
class VKDevice;
class VKFence;
class VKMemoryManager;
class VKStreamBuffer;
struct CachedBufferEntry final : public RasterizerCacheObject {
VAddr GetAddr() const override {
return addr;
}
std::size_t GetSizeInBytes() const override {
return size;
}
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
VAddr addr;
std::size_t size;
u64 offset;
std::size_t alignment;
};
class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
public:
explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
~VKBufferCache();
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
/// allocated.
u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
bool cache = true);
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
/// Reserves a region of memory to be used in subsequent upload/reserve operations.
void Reserve(std::size_t max_size);
/// Ensures that the set data is sent to the device.
[[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
/// Returns the buffer cache handle.
vk::Buffer GetBuffer() const {
return buffer_handle;
}
private:
void AlignBuffer(std::size_t alignment);
Tegra::MemoryManager& tegra_memory_manager;
std::unique_ptr<VKStreamBuffer> stream_buffer;
vk::Buffer buffer_handle;
u8* buffer_ptr = nullptr;
u64 buffer_offset = 0;
u64 buffer_offset_base = 0;
};
} // namespace Vulkan

View File

@@ -1,252 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <optional>
#include <tuple>
#include <vector>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
namespace Vulkan {
// TODO(Rodrigo): Fine tune this number
constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
class VKMemoryAllocation final {
public:
explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
: device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
shifted_type{ShiftType(type)}, is_mappable{properties &
vk::MemoryPropertyFlagBits::eHostVisible} {
if (is_mappable) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
}
}
~VKMemoryAllocation() {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
if (is_mappable)
dev.unmapMemory(memory, dld);
dev.free(memory, nullptr, dld);
}
VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
static_cast<u64>(alignment));
if (!found) {
found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
static_cast<u64>(alignment));
if (!found) {
// Signal out of memory, it'll try to do more allocations.
return nullptr;
}
}
u8* address = is_mappable ? base_address + *found : nullptr;
auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
*found + commit_size);
commits.push_back(commit.get());
// Last commit's address is highly probable to be free.
free_iterator = *found + commit_size;
return commit;
}
void Free(const VKMemoryCommitImpl* commit) {
ASSERT(commit);
const auto it =
std::find_if(commits.begin(), commits.end(),
[&](const auto& stored_commit) { return stored_commit == commit; });
if (it == commits.end()) {
LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
UNREACHABLE();
return;
}
commits.erase(it);
}
/// Returns whether this allocation is compatible with the arguments.
bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const {
return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) &&
(type_mask & shifted_type) != 0;
}
private:
static constexpr u32 ShiftType(u32 type) {
return 1U << type;
}
/// A memory allocator, it may return a free region between "start" and "end" with the solicited
/// requeriments.
std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
u64 iterator = start;
while (iterator + size < end) {
const u64 try_left = Common::AlignUp(iterator, alignment);
const u64 try_right = try_left + size;
bool overlap = false;
for (const auto& commit : commits) {
const auto [commit_left, commit_right] = commit->interval;
if (try_left < commit_right && commit_left < try_right) {
// There's an overlap, continue the search where the overlapping commit ends.
iterator = commit_right;
overlap = true;
break;
}
}
if (!overlap) {
// A free address has been found.
return try_left;
}
}
// No free regions where found, return an empty optional.
return std::nullopt;
}
const VKDevice& device; ///< Vulkan device.
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
const u64 alloc_size; ///< Size of this allocation.
const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
const bool is_mappable; ///< Whether the allocation is mappable.
/// Base address of the mapped pointer.
u8* base_address{};
/// Hints where the next free region is likely going to be.
u64 free_iterator{};
/// Stores all commits done from this allocation.
std::vector<const VKMemoryCommitImpl*> commits;
};
VKMemoryManager::VKMemoryManager(const VKDevice& device)
: device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
is_memory_unified{GetMemoryUnified(props)} {}
VKMemoryManager::~VKMemoryManager() = default;
VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
// When a host visible commit is asked, search for host visible and coherent, otherwise search
// for a fast device local type.
const vk::MemoryPropertyFlags wanted_properties =
host_visible
? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
: vk::MemoryPropertyFlagBits::eDeviceLocal;
const auto TryCommit = [&]() -> VKMemoryCommit {
for (auto& alloc : allocs) {
if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
continue;
if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
return commit;
}
}
return {};
};
if (auto commit = TryCommit(); commit) {
return commit;
}
// Commit has failed, allocate more memory.
if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
// TODO(Rodrigo): Try to use host memory.
LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
UNREACHABLE();
}
// Commit again, this time it won't fail since there's a fresh allocation above. If it does,
// there's a bug.
auto commit = TryCommit();
ASSERT(commit);
return commit;
}
VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
auto commit = Commit(requeriments, host_visible);
dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
return commit;
}
VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const auto requeriments = dev.getImageMemoryRequirements(image, dld);
auto commit = Commit(requeriments, host_visible);
dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
return commit;
}
bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
u64 size) {
const u32 type = [&]() {
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
const auto flags = props.memoryTypes[type_index].propertyFlags;
if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
// The type matches in type and in the wanted properties.
return type_index;
}
}
LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
UNREACHABLE();
return 0u;
}();
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
// Try to allocate found type.
const vk::MemoryAllocateInfo memory_ai(size, type);
vk::DeviceMemory memory;
if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
res != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
return false;
}
allocs.push_back(
std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
return true;
}
/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
// Memory is considered unified when heaps are device local only.
return false;
}
}
return true;
}
VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
u8* data, u64 begin, u64 end)
: interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
allocation->Free(this);
}
u8* VKMemoryCommitImpl::GetData() const {
ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
return data;
}
} // namespace Vulkan

View File

@@ -1,87 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan {
class VKDevice;
class VKMemoryAllocation;
class VKMemoryCommitImpl;
using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
class VKMemoryManager final {
public:
explicit VKMemoryManager(const VKDevice& device);
~VKMemoryManager();
/**
* Commits a memory with the specified requeriments.
* @param reqs Requeriments returned from a Vulkan call.
* @param host_visible Signals the allocator that it *must* use host visible and coherent
* memory. When passing false, it will try to allocate device local memory.
* @returns A memory commit.
*/
VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
/// Commits memory required by the buffer and binds it.
VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible);
/// Commits memory required by the image and binds it.
VKMemoryCommit Commit(vk::Image image, bool host_visible);
/// Returns true if the memory allocations are done always in host visible and coherent memory.
bool IsMemoryUnified() const {
return is_memory_unified;
}
private:
/// Allocates a chunk of memory.
bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
/// Returns true if the device uses an unified memory model.
static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
const VKDevice& device; ///< Device handler.
const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties.
const bool is_memory_unified; ///< True if memory model is unified.
std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
};
class VKMemoryCommitImpl final {
friend VKMemoryAllocation;
public:
explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
u64 begin, u64 end);
~VKMemoryCommitImpl();
/// Returns the writeable memory map. The commit has to be mappable.
u8* GetData() const;
/// Returns the Vulkan memory handler.
vk::DeviceMemory GetMemory() const {
return memory;
}
/// Returns the start position of the commit relative to the allocation.
vk::DeviceSize GetOffset() const {
return static_cast<vk::DeviceSize>(interval.first);
}
private:
std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
vk::DeviceMemory memory; ///< Vulkan device memory handler.
VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
};
} // namespace Vulkan

View File

@@ -1,285 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <optional>
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
namespace Vulkan {
// TODO(Rodrigo): Fine tune these numbers.
constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
constexpr std::size_t FENCES_GROW_STEP = 0x40;
class CommandBufferPool final : public VKFencedPool {
public:
CommandBufferPool(const VKDevice& device)
: VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
void Allocate(std::size_t begin, std::size_t end) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const u32 graphics_family = device.GetGraphicsFamily();
auto pool = std::make_unique<Pool>();
// Command buffers are going to be commited, recorded, executed every single usage cycle.
// They are also going to be reseted when commited.
const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient |
vk::CommandPoolCreateFlagBits::eResetCommandBuffer;
const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family);
pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld);
const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle,
vk::CommandBufferLevel::ePrimary,
static_cast<u32>(COMMAND_BUFFER_POOL_SIZE));
pool->cmdbufs =
dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld);
pools.push_back(std::move(pool));
}
vk::CommandBuffer Commit(VKFence& fence) {
const std::size_t index = CommitResource(fence);
const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
return *pools[pool_index]->cmdbufs[sub_index];
}
private:
struct Pool {
UniqueCommandPool handle;
std::vector<UniqueCommandBuffer> cmdbufs;
};
const VKDevice& device;
std::vector<std::unique_ptr<Pool>> pools;
};
VKResource::VKResource() = default;
VKResource::~VKResource() = default;
VKFence::VKFence(const VKDevice& device, UniqueFence handle)
: device{device}, handle{std::move(handle)} {}
VKFence::~VKFence() = default;
void VKFence::Wait() {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
}
void VKFence::Release() {
is_owned = false;
}
void VKFence::Commit() {
is_owned = true;
is_used = true;
}
bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
if (!is_used) {
// If a fence is not used it's always free.
return true;
}
if (is_owned && !owner_wait) {
// The fence is still being owned (Release has not been called) and ownership wait has
// not been asked.
return false;
}
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
if (gpu_wait) {
// Wait for the fence if it has been requested.
dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
} else {
if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) {
// Vulkan fence is not ready, not much it can do here
return false;
}
}
// Broadcast resources their free state.
for (auto* resource : protected_resources) {
resource->OnFenceRemoval(this);
}
protected_resources.clear();
// Prepare fence for reusage.
dev.resetFences({*handle}, dld);
is_used = false;
return true;
}
void VKFence::Protect(VKResource* resource) {
protected_resources.push_back(resource);
}
void VKFence::Unprotect(VKResource* resource) {
const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
ASSERT(it != protected_resources.end());
resource->OnFenceRemoval(this);
protected_resources.erase(it);
}
VKFenceWatch::VKFenceWatch() = default;
VKFenceWatch::~VKFenceWatch() {
if (fence) {
fence->Unprotect(this);
}
}
void VKFenceWatch::Wait() {
if (fence == nullptr) {
return;
}
fence->Wait();
fence->Unprotect(this);
}
void VKFenceWatch::Watch(VKFence& new_fence) {
Wait();
fence = &new_fence;
fence->Protect(this);
}
bool VKFenceWatch::TryWatch(VKFence& new_fence) {
if (fence) {
return false;
}
fence = &new_fence;
fence->Protect(this);
return true;
}
void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) {
ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence");
fence = nullptr;
}
VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {}
VKFencedPool::~VKFencedPool() = default;
std::size_t VKFencedPool::CommitResource(VKFence& fence) {
const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
for (std::size_t iterator = begin; iterator < end; ++iterator) {
if (watches[iterator]->TryWatch(fence)) {
// The resource is now being watched, a free resource was successfully found.
return iterator;
}
}
return {};
};
// Try to find a free resource from the hinted position to the end.
auto found = Search(free_iterator, watches.size());
if (!found) {
// Search from beginning to the hinted position.
found = Search(0, free_iterator);
if (!found) {
// Both searches failed, the pool is full; handle it.
const std::size_t free_resource = ManageOverflow();
// Watch will wait for the resource to be free.
watches[free_resource]->Watch(fence);
found = free_resource;
}
}
// Free iterator is hinted to the resource after the one that's been commited.
free_iterator = (*found + 1) % watches.size();
return *found;
}
std::size_t VKFencedPool::ManageOverflow() {
const std::size_t old_capacity = watches.size();
Grow();
// The last entry is guaranted to be free, since it's the first element of the freshly
// allocated resources.
return old_capacity;
}
void VKFencedPool::Grow() {
const std::size_t old_capacity = watches.size();
watches.resize(old_capacity + grow_step);
std::generate(watches.begin() + old_capacity, watches.end(),
[]() { return std::make_unique<VKFenceWatch>(); });
Allocate(old_capacity, old_capacity + grow_step);
}
VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} {
GrowFences(FENCES_GROW_STEP);
command_buffer_pool = std::make_unique<CommandBufferPool>(device);
}
VKResourceManager::~VKResourceManager() = default;
VKFence& VKResourceManager::CommitFence() {
const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* {
const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); };
const auto hinted = fences.begin() + fences_iterator;
auto it = std::find_if(hinted, fences.end(), Tick);
if (it == fences.end()) {
it = std::find_if(fences.begin(), hinted, Tick);
if (it == hinted) {
return nullptr;
}
}
fences_iterator = std::distance(fences.begin(), it) + 1;
if (fences_iterator >= fences.size())
fences_iterator = 0;
auto& fence = *it;
fence->Commit();
return fence.get();
};
VKFence* found_fence = StepFences(false, false);
if (!found_fence) {
// Try again, this time waiting.
found_fence = StepFences(true, false);
if (!found_fence) {
// Allocate new fences and try again.
LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(),
fences.size() + FENCES_GROW_STEP);
GrowFences(FENCES_GROW_STEP);
found_fence = StepFences(true, false);
ASSERT(found_fence != nullptr);
}
}
return *found_fence;
}
vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
return command_buffer_pool->Commit(fence);
}
void VKResourceManager::GrowFences(std::size_t new_fences_count) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const vk::FenceCreateInfo fence_ci;
const std::size_t previous_size = fences.size();
fences.resize(previous_size + new_fences_count);
std::generate(fences.begin() + previous_size, fences.end(), [&]() {
return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld));
});
}
} // namespace Vulkan

View File

@@ -1,180 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <memory>
#include <vector>
#include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan {
class VKDevice;
class VKFence;
class VKResourceManager;
class CommandBufferPool;
/// Interface for a Vulkan resource
class VKResource {
public:
explicit VKResource();
virtual ~VKResource();
/**
* Signals the object that an owning fence has been signaled.
* @param signaling_fence Fence that signals its usage end.
*/
virtual void OnFenceRemoval(VKFence* signaling_fence) = 0;
};
/**
* Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access.
* They must be commited from the resource manager. Their usage flow is: commit the fence from the
* resource manager, protect resources with it and use them, send the fence to an execution queue
* and Wait for it if needed and then call Release. Used resources will automatically be signaled
* when they are free to be reused.
* @brief Protects resources for concurrent usage and signals its release.
*/
class VKFence {
friend class VKResourceManager;
public:
explicit VKFence(const VKDevice& device, UniqueFence handle);
~VKFence();
/**
* Waits for the fence to be signaled.
* @warning You must have ownership of the fence and it has to be previously sent to a queue to
* call this function.
*/
void Wait();
/**
* Releases ownership of the fence. Pass after it has been sent to an execution queue.
* Unmanaged usage of the fence after the call will result in undefined behavior because it may
* be being used for something else.
*/
void Release();
/// Protects a resource with this fence.
void Protect(VKResource* resource);
/// Removes protection for a resource.
void Unprotect(VKResource* resource);
/// Retreives the fence.
operator vk::Fence() const {
return *handle;
}
private:
/// Take ownership of the fence.
void Commit();
/**
* Updates the fence status.
* @warning Waiting for the owner might soft lock the execution.
* @param gpu_wait Wait for the fence to be signaled by the driver.
* @param owner_wait Wait for the owner to signal its freedom.
* @returns True if the fence is free. Waiting for gpu and owner will always return true.
*/
bool Tick(bool gpu_wait, bool owner_wait);
const VKDevice& device; ///< Device handler
UniqueFence handle; ///< Vulkan fence
std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
bool is_owned = false; ///< The fence has been commited but not released yet.
bool is_used = false; ///< The fence has been commited but it has not been checked to be free.
};
/**
* A fence watch is used to keep track of the usage of a fence and protect a resource or set of
* resources without having to inherit VKResource from their handlers.
*/
class VKFenceWatch final : public VKResource {
public:
explicit VKFenceWatch();
~VKFenceWatch();
/// Waits for the fence to be released.
void Wait();
/**
* Waits for a previous fence and watches a new one.
* @param new_fence New fence to wait to.
*/
void Watch(VKFence& new_fence);
/**
* Checks if it's currently being watched and starts watching it if it's available.
* @returns True if a watch has started, false if it's being watched.
*/
bool TryWatch(VKFence& new_fence);
void OnFenceRemoval(VKFence* signaling_fence) override;
private:
VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
};
/**
* Handles a pool of resources protected by fences. Manages resource overflow allocating more
* resources.
*/
class VKFencedPool {
public:
explicit VKFencedPool(std::size_t grow_step);
virtual ~VKFencedPool();
protected:
/**
* Commits a free resource and protects it with a fence. It may allocate new resources.
* @param fence Fence that protects the commited resource.
* @returns Index of the resource commited.
*/
std::size_t CommitResource(VKFence& fence);
/// Called when a chunk of resources have to be allocated.
virtual void Allocate(std::size_t begin, std::size_t end) = 0;
private:
/// Manages pool overflow allocating new resources.
std::size_t ManageOverflow();
/// Allocates a new page of resources.
void Grow();
std::size_t grow_step = 0; ///< Number of new resources created after an overflow
std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources
};
/**
* The resource manager handles all resources that can be protected with a fence avoiding
* driver-side or GPU-side concurrent usage. Usage is documented in VKFence.
*/
class VKResourceManager final {
public:
explicit VKResourceManager(const VKDevice& device);
~VKResourceManager();
/// Commits a fence. It has to be sent to a queue and released.
VKFence& CommitFence();
/// Commits an unused command buffer and protects it with a fence.
vk::CommandBuffer CommitCommandBuffer(VKFence& fence);
private:
/// Allocates new fences.
void GrowFences(std::size_t new_fences_count);
const VKDevice& device; ///< Device handler.
std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found.
std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences.
std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers.
};
} // namespace Vulkan

View File

@@ -1,60 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
: device{device}, resource_manager{resource_manager} {
next_fence = &resource_manager.CommitFence();
AllocateNewContext();
}
VKScheduler::~VKScheduler() = default;
VKExecutionContext VKScheduler::GetExecutionContext() const {
return VKExecutionContext(current_fence, current_cmdbuf);
}
VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
SubmitExecution(semaphore);
current_fence->Release();
AllocateNewContext();
return GetExecutionContext();
}
VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) {
SubmitExecution(semaphore);
current_fence->Wait();
current_fence->Release();
AllocateNewContext();
return GetExecutionContext();
}
void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
const auto& dld = device.GetDispatchLoader();
current_cmdbuf.end(dld);
const auto queue = device.GetGraphicsQueue();
const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
&semaphore);
queue.submit({submit_info}, *current_fence, dld);
}
void VKScheduler::AllocateNewContext() {
current_fence = next_fence;
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
next_fence = &resource_manager.CommitFence();
const auto& dld = device.GetDispatchLoader();
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
}
} // namespace Vulkan

View File

@@ -1,69 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan {
class VKDevice;
class VKExecutionContext;
class VKFence;
class VKResourceManager;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class VKScheduler {
public:
explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
~VKScheduler();
/// Gets the current execution context.
[[nodiscard]] VKExecutionContext GetExecutionContext() const;
/// Sends the current execution context to the GPU. It invalidates the current execution context
/// and returns a new one.
VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete. It invalidates
/// the current execution context and returns a new one.
VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
private:
void SubmitExecution(vk::Semaphore semaphore);
void AllocateNewContext();
const VKDevice& device;
VKResourceManager& resource_manager;
vk::CommandBuffer current_cmdbuf;
VKFence* current_fence = nullptr;
VKFence* next_fence = nullptr;
};
class VKExecutionContext {
friend class VKScheduler;
public:
VKExecutionContext() = default;
VKFence& GetFence() const {
return *fence;
}
vk::CommandBuffer GetCommandBuffer() const {
return cmdbuf;
}
private:
explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
: fence{fence}, cmdbuf{cmdbuf} {}
VKFence* fence{};
vk::CommandBuffer cmdbuf;
};
} // namespace Vulkan

View File

@@ -1,90 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <memory>
#include <optional>
#include <vector>
#include "common/assert.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
namespace Vulkan {
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
: device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
pipeline_stage} {
CreateBuffers(memory_manager, usage);
ReserveWatches(WATCHES_INITIAL_RESERVE);
}
VKStreamBuffer::~VKStreamBuffer() = default;
std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
ASSERT(size <= buffer_size);
mapped_size = size;
if (offset + size > buffer_size) {
// The buffer would overflow, save the amount of used buffers, signal an invalidation and
// reset the state.
invalidation_mark = used_watches;
used_watches = 0;
offset = 0;
}
return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
}
VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
if (invalidation_mark) {
// TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
exctx = scheduler.Flush();
std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
[&](auto& resource) { resource->Wait(); });
invalidation_mark = std::nullopt;
}
if (used_watches + 1 >= watches.size()) {
// Ensure that there are enough watches.
ReserveWatches(WATCHES_RESERVE_CHUNK);
}
// Add a watch for this allocation.
watches[used_watches++]->Watch(exctx.GetFence());
offset += size;
return exctx;
}
void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
nullptr);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
commit = memory_manager.Commit(*buffer, true);
mapped_pointer = commit->GetData();
}
void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
const std::size_t previous_size = watches.size();
watches.resize(previous_size + grow_size);
std::generate(watches.begin() + previous_size, watches.end(),
[]() { return std::make_unique<VKFenceWatch>(); });
}
} // namespace Vulkan

View File

@@ -1,72 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <optional>
#include <tuple>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
namespace Vulkan {
class VKDevice;
class VKFence;
class VKFenceWatch;
class VKResourceManager;
class VKScheduler;
class VKStreamBuffer {
public:
explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
~VKStreamBuffer();
/**
* Reserves a region of memory from the stream buffer.
* @param size Size to reserve.
* @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
* offset and a boolean that's true when buffer has been invalidated.
*/
std::tuple<u8*, u64, bool> Reserve(u64 size);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
[[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
vk::Buffer GetBuffer() const {
return *buffer;
}
private:
/// Creates Vulkan buffer handles committing the required the required memory.
void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
/// Increases the amount of watches available.
void ReserveWatches(std::size_t grow_size);
const VKDevice& device; ///< Vulkan device manager.
VKScheduler& scheduler; ///< Command scheduler.
const u64 buffer_size; ///< Total size of the stream buffer.
const vk::AccessFlags access; ///< Access usage of this stream buffer.
const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
UniqueBuffer buffer; ///< Mapped buffer.
VKMemoryCommit commit; ///< Memory commit.
u8* mapped_pointer{}; ///< Pointer to the host visible commit
u64 offset{}; ///< Buffer iterator.
u64 mapped_size{}; ///< Size reserved for the current copy.
std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
std::optional<std::size_t>
invalidation_mark{}; ///< Number of watches used in the current invalidation.
};
} // namespace Vulkan

View File

@@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
Tegra::Shader::IpaSampleMode::Default};
u64 next_element = instr.attribute.fmt20.element;

View File

@@ -135,18 +135,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
instr.ipa.sample_mode.Value()};
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
Node value = attr;
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
index <= Tegra::Shader::Attribute::Index::Attribute_31) {
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
// In theory by setting them as perspective, OpenGL does the perspective correction.
// A way must figured to reverse the last step of it.
if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
}
}
value = GetSaturatedFloat(value, instr.ipa.saturate);
const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
SetRegister(bb, instr.gpr0, value);
break;
@@ -186,4 +175,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -20,9 +20,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
return {node, cursor};
}
if (const auto conditional = std::get_if<ConditionalNode>(node)) {
const auto& conditional_code = conditional->GetCode();
const auto [found, internal_cursor] = FindOperation(
conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
const auto& code = conditional->GetCode();
const auto [found, internal_cursor] =
FindOperation(code, static_cast<s64>(code.size() - 1), operation_code);
if (found)
return {found, cursor};
}
@@ -58,8 +58,8 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
return nullptr;
}
if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
const auto& conditional_code = conditional->GetCode();
return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
const auto& code = conditional->GetCode();
return TrackCbuf(tracked, code, static_cast<s64>(code.size()));
}
return nullptr;
}

Some files were not shown because too many files have changed in this diff Show More