Compare commits
46 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
35c105a108 | ||
|
|
49c6d21b31 | ||
|
|
1b13859af8 | ||
|
|
1f5d6a8fed | ||
|
|
66f4fd4c81 | ||
|
|
7ea097e5c2 | ||
|
|
5a9204dbd7 | ||
|
|
d6b9b51606 | ||
|
|
e64fa4d2ea | ||
|
|
3558c88442 | ||
|
|
e9d84ef22c | ||
|
|
5bc82d124c | ||
|
|
8932001610 | ||
|
|
44ea2810e4 | ||
|
|
d583fc1e97 | ||
|
|
45b6d2d349 | ||
|
|
f15e2dd881 | ||
|
|
ef84c70d22 | ||
|
|
532dda0499 | ||
|
|
1068c1b06f | ||
|
|
6335bf136f | ||
|
|
42f7c11021 | ||
|
|
14430f7df9 | ||
|
|
eb5a3dd1c7 | ||
|
|
be1a1584fc | ||
|
|
66e023fba2 | ||
|
|
b27e6ad912 | ||
|
|
16ea93c11e | ||
|
|
a6a783b3dc | ||
|
|
e7eff72e83 | ||
|
|
46b3209abb | ||
|
|
0e1b5acc6a | ||
|
|
b9238edd0d | ||
|
|
e56f32a071 | ||
|
|
1b855efd5e | ||
|
|
a1574aabd5 | ||
|
|
10d1d58390 | ||
|
|
d91e35a50a | ||
|
|
5219edd715 | ||
|
|
730eb1dad7 | ||
|
|
d29f9e9709 | ||
|
|
5167d1577d | ||
|
|
4f8cd74061 | ||
|
|
0220862ba5 | ||
|
|
33a0597603 | ||
|
|
281a8bf259 |
2
externals/cubeb
vendored
2
externals/cubeb
vendored
Submodule externals/cubeb updated: 12b78c0edf...6f2420de8f
@@ -46,16 +46,18 @@ struct AudioRendererParameter {
|
||||
u32_le sample_rate;
|
||||
u32_le sample_count;
|
||||
u32_le mix_buffer_count;
|
||||
u32_le unknown_c;
|
||||
u32_le submix_count;
|
||||
u32_le voice_count;
|
||||
u32_le sink_count;
|
||||
u32_le effect_count;
|
||||
u32_le unknown_1c;
|
||||
u8 unknown_20;
|
||||
INSERT_PADDING_BYTES(3);
|
||||
u32_le performance_frame_count;
|
||||
u8 is_voice_drop_enabled;
|
||||
u8 unknown_21;
|
||||
u8 unknown_22;
|
||||
u8 execution_mode;
|
||||
u32_le splitter_count;
|
||||
u32_le unknown_2c;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32_le num_splitter_send_channels;
|
||||
u32_le unknown_30;
|
||||
u32_le revision;
|
||||
};
|
||||
static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
|
||||
|
||||
@@ -12,6 +12,10 @@
|
||||
#include "common/ring_buffer.h"
|
||||
#include "core/settings.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <objbase.h>
|
||||
#endif
|
||||
|
||||
namespace AudioCore {
|
||||
|
||||
class CubebSinkStream final : public SinkStream {
|
||||
@@ -108,6 +112,11 @@ private:
|
||||
};
|
||||
|
||||
CubebSink::CubebSink(std::string_view target_device_name) {
|
||||
// Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
|
||||
#ifdef _MSC_VER
|
||||
com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
|
||||
#endif
|
||||
|
||||
if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
|
||||
LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
|
||||
return;
|
||||
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
|
||||
}
|
||||
|
||||
cubeb_destroy(ctx);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
if (SUCCEEDED(com_init_result)) {
|
||||
CoUninitialize();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
|
||||
|
||||
@@ -25,6 +25,10 @@ private:
|
||||
cubeb* ctx{};
|
||||
cubeb_devid output_device{};
|
||||
std::vector<SinkStreamPtr> sink_streams;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
u32 com_init_result = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
std::vector<std::string> ListCubebSinkDevices();
|
||||
|
||||
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
|
||||
/**
|
||||
* Decode a color stored in RGBA8 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
*/
|
||||
inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
|
||||
inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
|
||||
return {bytes[3], bytes[2], bytes[1], bytes[0]};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a color stored in RGB8 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
*/
|
||||
inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
|
||||
inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
|
||||
return {bytes[2], bytes[1], bytes[0], 255};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a color stored in RG8 (aka HILO8) format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
*/
|
||||
inline Math::Vec4<u8> DecodeRG8(const u8* bytes) {
|
||||
inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
|
||||
return {bytes[1], bytes[0], 0, 255};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a color stored in RGB565 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
*/
|
||||
inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
|
||||
inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
|
||||
u16_le pixel;
|
||||
std::memcpy(&pixel, bytes, sizeof(pixel));
|
||||
return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
|
||||
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
|
||||
/**
|
||||
* Decode a color stored in RGB5A1 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
*/
|
||||
inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
|
||||
inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
|
||||
u16_le pixel;
|
||||
std::memcpy(&pixel, bytes, sizeof(pixel));
|
||||
return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
|
||||
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
|
||||
/**
|
||||
* Decode a color stored in RGBA4 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
*/
|
||||
inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
|
||||
inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
|
||||
u16_le pixel;
|
||||
std::memcpy(&pixel, bytes, sizeof(pixel));
|
||||
return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
|
||||
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
|
||||
/**
|
||||
* Decode a depth value and a stencil value stored in D24S8 format
|
||||
* @param bytes Pointer to encoded source values
|
||||
* @return Resulting values stored as a Math::Vec2
|
||||
* @return Resulting values stored as a Common::Vec2
|
||||
*/
|
||||
inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
|
||||
inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
|
||||
return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
|
||||
}
|
||||
|
||||
@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
bytes[3] = color.r();
|
||||
bytes[2] = color.g();
|
||||
bytes[1] = color.b();
|
||||
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
bytes[2] = color.r();
|
||||
bytes[1] = color.g();
|
||||
bytes[0] = color.b();
|
||||
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
bytes[1] = color.r();
|
||||
bytes[0] = color.g();
|
||||
}
|
||||
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
const u16_le data =
|
||||
(Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
|
||||
|
||||
@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
|
||||
(Convert8To5(color.b()) << 1) | Convert8To1(color.a());
|
||||
|
||||
@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
|
||||
(Convert8To4(color.b()) << 4) | Convert8To4(color.a());
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#include <cstdlib>
|
||||
#include <type_traits>
|
||||
|
||||
namespace MathUtil {
|
||||
namespace Common {
|
||||
|
||||
constexpr float PI = 3.14159265f;
|
||||
|
||||
@@ -41,4 +41,4 @@ struct Rectangle {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace MathUtil
|
||||
} // namespace Common
|
||||
|
||||
@@ -6,12 +6,12 @@
|
||||
|
||||
#include "common/vector_math.h"
|
||||
|
||||
namespace Math {
|
||||
namespace Common {
|
||||
|
||||
template <typename T>
|
||||
class Quaternion {
|
||||
public:
|
||||
Math::Vec3<T> xyz;
|
||||
Vec3<T> xyz;
|
||||
T w{};
|
||||
|
||||
Quaternion<decltype(-T{})> Inverse() const {
|
||||
@@ -38,12 +38,12 @@ public:
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) {
|
||||
auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
|
||||
return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
|
||||
}
|
||||
|
||||
inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) {
|
||||
inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
|
||||
return {axis * std::sin(angle / 2), std::cos(angle / 2)};
|
||||
}
|
||||
|
||||
} // namespace Math
|
||||
} // namespace Common
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
#include <cmath>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Math {
|
||||
namespace Common {
|
||||
|
||||
template <typename T>
|
||||
class Vec2;
|
||||
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
|
||||
return MakeVec(x, yzw[0], yzw[1], yzw[2]);
|
||||
}
|
||||
|
||||
} // namespace Math
|
||||
} // namespace Common
|
||||
|
||||
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
|
||||
framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
|
||||
}
|
||||
|
||||
std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) {
|
||||
std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
|
||||
new_x = std::max(new_x, framebuffer_layout.screen.left);
|
||||
new_x = std::min(new_x, framebuffer_layout.screen.right - 1);
|
||||
|
||||
|
||||
@@ -166,7 +166,7 @@ private:
|
||||
/**
|
||||
* Clip the provided coordinates to be inside the touchscreen area.
|
||||
*/
|
||||
std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y);
|
||||
std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
|
||||
};
|
||||
|
||||
} // namespace Core::Frontend
|
||||
|
||||
@@ -12,12 +12,12 @@ namespace Layout {
|
||||
|
||||
// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
|
||||
template <class T>
|
||||
static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area,
|
||||
float screen_aspect_ratio) {
|
||||
static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
|
||||
float screen_aspect_ratio) {
|
||||
float scale = std::min(static_cast<float>(window_area.GetWidth()),
|
||||
window_area.GetHeight() / screen_aspect_ratio);
|
||||
return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
|
||||
static_cast<T>(std::round(scale * screen_aspect_ratio))};
|
||||
return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
|
||||
static_cast<T>(std::round(scale * screen_aspect_ratio))};
|
||||
}
|
||||
|
||||
FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
|
||||
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
|
||||
|
||||
const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
|
||||
ScreenUndocked::Width};
|
||||
MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height};
|
||||
MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio);
|
||||
Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
|
||||
Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
|
||||
|
||||
float window_aspect_ratio = static_cast<float>(height) / width;
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ struct FramebufferLayout {
|
||||
unsigned width{ScreenUndocked::Width};
|
||||
unsigned height{ScreenUndocked::Height};
|
||||
|
||||
MathUtil::Rectangle<unsigned> screen;
|
||||
Common::Rectangle<unsigned> screen;
|
||||
|
||||
/**
|
||||
* Returns the ration of pixel size of the screen, compared to the native size of the undocked
|
||||
|
||||
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
|
||||
* Orientation is determined by right-hand rule.
|
||||
* Units: deg/sec
|
||||
*/
|
||||
using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>;
|
||||
using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
|
||||
|
||||
/**
|
||||
* A touch device is an input device that returns a tuple of two floats and a bool. The floats are
|
||||
|
||||
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
|
||||
constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
|
||||
constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
|
||||
constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
|
||||
constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
|
||||
constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
|
||||
constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
|
||||
constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
|
||||
|
||||
@@ -14,32 +14,47 @@
|
||||
namespace Kernel {
|
||||
namespace {
|
||||
constexpr u16 GetSlot(Handle handle) {
|
||||
return handle >> 15;
|
||||
return static_cast<u16>(handle >> 15);
|
||||
}
|
||||
|
||||
constexpr u16 GetGeneration(Handle handle) {
|
||||
return handle & 0x7FFF;
|
||||
return static_cast<u16>(handle & 0x7FFF);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
HandleTable::HandleTable() {
|
||||
next_generation = 1;
|
||||
Clear();
|
||||
}
|
||||
|
||||
HandleTable::~HandleTable() = default;
|
||||
|
||||
ResultCode HandleTable::SetSize(s32 handle_table_size) {
|
||||
if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
|
||||
return ERR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
// Values less than or equal to zero indicate to use the maximum allowable
|
||||
// size for the handle table in the actual kernel, so we ignore the given
|
||||
// value in that case, since we assume this by default unless this function
|
||||
// is called.
|
||||
if (handle_table_size > 0) {
|
||||
table_size = static_cast<u16>(handle_table_size);
|
||||
}
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
|
||||
DEBUG_ASSERT(obj != nullptr);
|
||||
|
||||
u16 slot = next_free_slot;
|
||||
if (slot >= generations.size()) {
|
||||
const u16 slot = next_free_slot;
|
||||
if (slot >= table_size) {
|
||||
LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
|
||||
return ERR_HANDLE_TABLE_FULL;
|
||||
}
|
||||
next_free_slot = generations[slot];
|
||||
|
||||
u16 generation = next_generation++;
|
||||
const u16 generation = next_generation++;
|
||||
|
||||
// Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
|
||||
// Horizon OS uses zero to represent an invalid handle, so skip to 1.
|
||||
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
|
||||
}
|
||||
|
||||
ResultCode HandleTable::Close(Handle handle) {
|
||||
if (!IsValid(handle))
|
||||
if (!IsValid(handle)) {
|
||||
return ERR_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
u16 slot = GetSlot(handle);
|
||||
const u16 slot = GetSlot(handle);
|
||||
|
||||
objects[slot] = nullptr;
|
||||
|
||||
@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
|
||||
}
|
||||
|
||||
bool HandleTable::IsValid(Handle handle) const {
|
||||
std::size_t slot = GetSlot(handle);
|
||||
u16 generation = GetGeneration(handle);
|
||||
const std::size_t slot = GetSlot(handle);
|
||||
const u16 generation = GetGeneration(handle);
|
||||
|
||||
return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation;
|
||||
return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
|
||||
}
|
||||
|
||||
SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
|
||||
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
|
||||
}
|
||||
|
||||
void HandleTable::Clear() {
|
||||
for (u16 i = 0; i < MAX_COUNT; ++i) {
|
||||
for (u16 i = 0; i < table_size; ++i) {
|
||||
generations[i] = i + 1;
|
||||
objects[i] = nullptr;
|
||||
}
|
||||
|
||||
@@ -49,6 +49,20 @@ public:
|
||||
HandleTable();
|
||||
~HandleTable();
|
||||
|
||||
/**
|
||||
* Sets the number of handles that may be in use at one time
|
||||
* for this handle table.
|
||||
*
|
||||
* @param handle_table_size The desired size to limit the handle table to.
|
||||
*
|
||||
* @returns an error code indicating if initialization was successful.
|
||||
* If initialization was not successful, then ERR_OUT_OF_MEMORY
|
||||
* will be returned.
|
||||
*
|
||||
* @pre handle_table_size must be within the range [0, 1024]
|
||||
*/
|
||||
ResultCode SetSize(s32 handle_table_size);
|
||||
|
||||
/**
|
||||
* Allocates a handle for the given object.
|
||||
* @return The created Handle or one of the following errors:
|
||||
@@ -103,14 +117,21 @@ private:
|
||||
*/
|
||||
std::array<u16, MAX_COUNT> generations;
|
||||
|
||||
/**
|
||||
* The limited size of the handle table. This can be specified by process
|
||||
* capabilities in order to restrict the overall number of handles that
|
||||
* can be created in a process instance
|
||||
*/
|
||||
u16 table_size = static_cast<u16>(MAX_COUNT);
|
||||
|
||||
/**
|
||||
* Global counter of the number of created handles. Stored in `generations` when a handle is
|
||||
* created, and wraps around to 1 when it hits 0x8000.
|
||||
*/
|
||||
u16 next_generation;
|
||||
u16 next_generation = 1;
|
||||
|
||||
/// Head of the free slots linked list.
|
||||
u16 next_free_slot;
|
||||
u16 next_free_slot = 0;
|
||||
};
|
||||
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -99,7 +99,13 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
|
||||
vm_manager.Reset(metadata.GetAddressSpaceType());
|
||||
|
||||
const auto& caps = metadata.GetKernelCapabilities();
|
||||
return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
|
||||
const auto capability_init_result =
|
||||
capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
|
||||
if (capability_init_result.IsError()) {
|
||||
return capability_init_result;
|
||||
}
|
||||
|
||||
return handle_table.SetSize(capabilities.GetHandleTableSize());
|
||||
}
|
||||
|
||||
void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
|
||||
|
||||
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
|
||||
interrupt_capabilities.set();
|
||||
|
||||
// Allow using the maximum possible amount of handles
|
||||
handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT);
|
||||
handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
|
||||
|
||||
// Allow all debugging capabilities.
|
||||
is_debuggable = true;
|
||||
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
|
||||
return ERR_RESERVED_VALUE;
|
||||
}
|
||||
|
||||
handle_table_size = (flags >> 16) & 0x3FF;
|
||||
handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -156,7 +156,7 @@ public:
|
||||
}
|
||||
|
||||
/// Gets the number of total allowable handles for the process' handle table.
|
||||
u32 GetHandleTableSize() const {
|
||||
s32 GetHandleTableSize() const {
|
||||
return handle_table_size;
|
||||
}
|
||||
|
||||
@@ -252,7 +252,7 @@ private:
|
||||
u64 core_mask = 0;
|
||||
u64 priority_mask = 0;
|
||||
|
||||
u32 handle_table_size = 0;
|
||||
s32 handle_table_size = 0;
|
||||
u32 kernel_version = 0;
|
||||
|
||||
ProgramType program_type = ProgramType::SysModule;
|
||||
|
||||
@@ -262,20 +262,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_Audio, "called");
|
||||
|
||||
u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
|
||||
buffer_sz += params.unknown_c * 1024;
|
||||
buffer_sz += 0x940 * (params.unknown_c + 1);
|
||||
buffer_sz += params.submix_count * 1024;
|
||||
buffer_sz += 0x940 * (params.submix_count + 1);
|
||||
buffer_sz += 0x3F0 * params.voice_count;
|
||||
buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10);
|
||||
buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
|
||||
buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
|
||||
buffer_sz +=
|
||||
Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
|
||||
(params.mix_buffer_count + 6),
|
||||
0x40);
|
||||
buffer_sz += Common::AlignUp(
|
||||
(0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
|
||||
(params.mix_buffer_count + 6),
|
||||
0x40);
|
||||
|
||||
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
|
||||
u32 count = params.unknown_c + 1;
|
||||
const u32 count = params.submix_count + 1;
|
||||
u64 node_count = Common::AlignUp(count, 0x40);
|
||||
u64 node_state_buffer_sz =
|
||||
const u64 node_state_buffer_sz =
|
||||
4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
|
||||
u64 edge_matrix_buffer_sz = 0;
|
||||
node_count = Common::AlignUp(count * count, 0x40);
|
||||
@@ -289,19 +289,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
|
||||
|
||||
buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
|
||||
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
|
||||
buffer_sz += 0xE0 * params.unknown_2c;
|
||||
buffer_sz += 0xE0 * params.num_splitter_send_channels;
|
||||
buffer_sz += 0x20 * params.splitter_count;
|
||||
buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10);
|
||||
buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
|
||||
}
|
||||
buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
|
||||
u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
|
||||
((params.voice_count * 256) | 0x40);
|
||||
|
||||
if (params.unknown_1c >= 1) {
|
||||
if (params.performance_frame_count >= 1) {
|
||||
output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
|
||||
16 * params.voice_count + 16) +
|
||||
0x658) *
|
||||
(params.unknown_1c + 1) +
|
||||
(params.performance_frame_count + 1) +
|
||||
0xc0,
|
||||
0x40) +
|
||||
output_sz;
|
||||
|
||||
@@ -15,7 +15,7 @@ namespace Kernel {
|
||||
class SharedMemory;
|
||||
}
|
||||
|
||||
namespace SM {
|
||||
namespace Service::SM {
|
||||
class ServiceManager;
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
|
||||
|
||||
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
|
||||
u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
|
||||
const MathUtil::Rectangle<int>& crop_rect) {
|
||||
const Common::Rectangle<int>& crop_rect) {
|
||||
VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
|
||||
LOG_TRACE(Service,
|
||||
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
|
||||
|
||||
@@ -25,7 +25,7 @@ public:
|
||||
/// Performs a screen flip, drawing the buffer pointed to by the handle.
|
||||
void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
|
||||
NVFlinger::BufferQueue::BufferTransformFlags transform,
|
||||
const MathUtil::Rectangle<int>& crop_rect);
|
||||
const Common::Rectangle<int>& crop_rect);
|
||||
|
||||
private:
|
||||
std::shared_ptr<nvmap> nvmap_dev;
|
||||
|
||||
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
|
||||
}
|
||||
|
||||
void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
|
||||
const MathUtil::Rectangle<int>& crop_rect) {
|
||||
const Common::Rectangle<int>& crop_rect) {
|
||||
auto itr = std::find_if(queue.begin(), queue.end(),
|
||||
[&](const Buffer& buffer) { return buffer.slot == slot; });
|
||||
ASSERT(itr != queue.end());
|
||||
|
||||
@@ -67,14 +67,14 @@ public:
|
||||
Status status = Status::Free;
|
||||
IGBPBuffer igbp_buffer;
|
||||
BufferTransformFlags transform;
|
||||
MathUtil::Rectangle<int> crop_rect;
|
||||
Common::Rectangle<int> crop_rect;
|
||||
};
|
||||
|
||||
void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
|
||||
std::optional<u32> DequeueBuffer(u32 width, u32 height);
|
||||
const IGBPBuffer& RequestBuffer(u32 slot) const;
|
||||
void QueueBuffer(u32 slot, BufferTransformFlags transform,
|
||||
const MathUtil::Rectangle<int>& crop_rect);
|
||||
const Common::Rectangle<int>& crop_rect);
|
||||
std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
|
||||
void ReleaseBuffer(u32 slot);
|
||||
u32 Query(QueryType type);
|
||||
|
||||
@@ -420,7 +420,7 @@ public:
|
||||
u32_le fence_is_valid;
|
||||
std::array<Fence, 2> fences;
|
||||
|
||||
MathUtil::Rectangle<int> GetCropRect() const {
|
||||
Common::Rectangle<int> GetCropRect() const {
|
||||
return {crop_left, crop_top, crop_right, crop_bottom};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -71,15 +71,20 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
|
||||
FlushMode::FlushAndInvalidate);
|
||||
|
||||
VAddr end = base + size;
|
||||
while (base != end) {
|
||||
ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base);
|
||||
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
|
||||
base + page_table.pointers.size());
|
||||
|
||||
page_table.attributes[base] = type;
|
||||
page_table.pointers[base] = memory;
|
||||
std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
|
||||
|
||||
base += 1;
|
||||
if (memory != nullptr)
|
||||
if (memory == nullptr) {
|
||||
std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
|
||||
} else {
|
||||
while (base != end) {
|
||||
page_table.pointers[base] = memory;
|
||||
|
||||
base += 1;
|
||||
memory += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,12 +32,12 @@ public:
|
||||
}
|
||||
|
||||
void BeginTilt(int x, int y) {
|
||||
mouse_origin = Math::MakeVec(x, y);
|
||||
mouse_origin = Common::MakeVec(x, y);
|
||||
is_tilting = true;
|
||||
}
|
||||
|
||||
void Tilt(int x, int y) {
|
||||
auto mouse_move = Math::MakeVec(x, y) - mouse_origin;
|
||||
auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
|
||||
if (is_tilting) {
|
||||
std::lock_guard<std::mutex> guard(tilt_mutex);
|
||||
if (mouse_move.x == 0 && mouse_move.y == 0) {
|
||||
@@ -45,7 +45,7 @@ public:
|
||||
} else {
|
||||
tilt_direction = mouse_move.Cast<float>();
|
||||
tilt_angle =
|
||||
std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f);
|
||||
std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -56,7 +56,7 @@ public:
|
||||
is_tilting = false;
|
||||
}
|
||||
|
||||
std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() {
|
||||
std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
|
||||
std::lock_guard<std::mutex> guard(status_mutex);
|
||||
return status;
|
||||
}
|
||||
@@ -66,17 +66,17 @@ private:
|
||||
const std::chrono::steady_clock::duration update_duration;
|
||||
const float sensitivity;
|
||||
|
||||
Math::Vec2<int> mouse_origin;
|
||||
Common::Vec2<int> mouse_origin;
|
||||
|
||||
std::mutex tilt_mutex;
|
||||
Math::Vec2<float> tilt_direction;
|
||||
Common::Vec2<float> tilt_direction;
|
||||
float tilt_angle = 0;
|
||||
|
||||
bool is_tilting = false;
|
||||
|
||||
Common::Event shutdown_event;
|
||||
|
||||
std::tuple<Math::Vec3<float>, Math::Vec3<float>> status;
|
||||
std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
|
||||
std::mutex status_mutex;
|
||||
|
||||
// Note: always keep the thread declaration at the end so that other objects are initialized
|
||||
@@ -85,8 +85,8 @@ private:
|
||||
|
||||
void MotionEmuThread() {
|
||||
auto update_time = std::chrono::steady_clock::now();
|
||||
Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0);
|
||||
Math::Quaternion<float> old_q;
|
||||
Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
|
||||
Common::Quaternion<float> old_q;
|
||||
|
||||
while (!shutdown_event.WaitUntil(update_time)) {
|
||||
update_time += update_duration;
|
||||
@@ -96,18 +96,18 @@ private:
|
||||
std::lock_guard<std::mutex> guard(tilt_mutex);
|
||||
|
||||
// Find the quaternion describing current 3DS tilting
|
||||
q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x),
|
||||
tilt_angle);
|
||||
q = Common::MakeQuaternion(
|
||||
Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
|
||||
}
|
||||
|
||||
auto inv_q = q.Inverse();
|
||||
|
||||
// Set the gravity vector in world space
|
||||
auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f);
|
||||
auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
|
||||
|
||||
// Find the angular rate vector in world space
|
||||
auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
|
||||
angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180;
|
||||
angular_rate *= 1000 / update_millisecond / Common::PI * 180;
|
||||
|
||||
// Transform the two vectors from world space to 3DS space
|
||||
gravity = QuaternionRotate(inv_q, gravity);
|
||||
@@ -131,7 +131,7 @@ public:
|
||||
device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
|
||||
}
|
||||
|
||||
std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override {
|
||||
std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
|
||||
return device->GetStatus();
|
||||
}
|
||||
|
||||
|
||||
@@ -104,6 +104,8 @@ add_library(video_core STATIC
|
||||
if (ENABLE_VULKAN)
|
||||
target_sources(video_core PRIVATE
|
||||
renderer_vulkan/declarations.h
|
||||
renderer_vulkan/vk_buffer_cache.cpp
|
||||
renderer_vulkan/vk_buffer_cache.h
|
||||
renderer_vulkan/vk_device.cpp
|
||||
renderer_vulkan/vk_device.h
|
||||
renderer_vulkan/vk_memory_manager.cpp
|
||||
@@ -111,7 +113,9 @@ if (ENABLE_VULKAN)
|
||||
renderer_vulkan/vk_resource_manager.cpp
|
||||
renderer_vulkan/vk_resource_manager.h
|
||||
renderer_vulkan/vk_scheduler.cpp
|
||||
renderer_vulkan/vk_scheduler.h)
|
||||
renderer_vulkan/vk_scheduler.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
renderer_vulkan/vk_stream_buffer.h)
|
||||
|
||||
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
|
||||
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
|
||||
|
||||
@@ -44,10 +44,10 @@ void Fermi2D::HandleSurfaceCopy() {
|
||||
const u32 src_blit_y2{
|
||||
static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
|
||||
|
||||
const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
|
||||
const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
|
||||
regs.blit_dst_x + regs.blit_dst_width,
|
||||
regs.blit_dst_y + regs.blit_dst_height};
|
||||
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
|
||||
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
|
||||
regs.blit_dst_x + regs.blit_dst_width,
|
||||
regs.blit_dst_y + regs.blit_dst_height};
|
||||
|
||||
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
|
||||
UNIMPLEMENTED();
|
||||
|
||||
@@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
|
||||
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
auto debug_context = system.GetGPUDebugContext();
|
||||
|
||||
const u32 method = method_call.method;
|
||||
|
||||
// It is an error to write to a register other than the current macro's ARG register before it
|
||||
// has finished execution.
|
||||
if (executing_macro != 0) {
|
||||
ASSERT(method_call.method == executing_macro + 1);
|
||||
ASSERT(method == executing_macro + 1);
|
||||
}
|
||||
|
||||
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
|
||||
// uploaded to the GPU during initialization.
|
||||
if (method_call.method >= MacroRegistersStart) {
|
||||
if (method >= MacroRegistersStart) {
|
||||
// We're trying to execute a macro
|
||||
if (executing_macro == 0) {
|
||||
// A macro call must begin by writing the macro method's register, not its argument.
|
||||
ASSERT_MSG((method_call.method % 2) == 0,
|
||||
ASSERT_MSG((method % 2) == 0,
|
||||
"Can't start macro execution by writing to the ARGS register");
|
||||
executing_macro = method_call.method;
|
||||
executing_macro = method;
|
||||
}
|
||||
|
||||
macro_params.push_back(method_call.argument);
|
||||
@@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid Maxwell3D register, increase the size of the Regs structure");
|
||||
|
||||
if (debug_context) {
|
||||
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
|
||||
}
|
||||
|
||||
if (regs.reg_array[method_call.method] != method_call.argument) {
|
||||
regs.reg_array[method_call.method] = method_call.argument;
|
||||
if (regs.reg_array[method] != method_call.argument) {
|
||||
regs.reg_array[method] = method_call.argument;
|
||||
// Color buffers
|
||||
constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
|
||||
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
|
||||
if (method_call.method >= first_rt_reg &&
|
||||
method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
|
||||
const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
|
||||
dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
|
||||
if (method >= first_rt_reg &&
|
||||
method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
|
||||
const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
|
||||
dirty_flags.color_buffer.set(rt_index);
|
||||
}
|
||||
|
||||
// Zeta buffer
|
||||
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
|
||||
if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
|
||||
method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
|
||||
method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
|
||||
(method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
|
||||
method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
|
||||
if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
|
||||
method == MAXWELL3D_REG_INDEX(zeta_width) ||
|
||||
method == MAXWELL3D_REG_INDEX(zeta_height) ||
|
||||
(method >= MAXWELL3D_REG_INDEX(zeta) &&
|
||||
method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
|
||||
dirty_flags.zeta_buffer = true;
|
||||
}
|
||||
|
||||
// Shader
|
||||
constexpr u32 shader_registers_count =
|
||||
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
|
||||
if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
|
||||
method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
|
||||
if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
|
||||
method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
|
||||
dirty_flags.shaders = true;
|
||||
}
|
||||
|
||||
// Vertex format
|
||||
if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
|
||||
method_call.method <
|
||||
MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
|
||||
if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
|
||||
dirty_flags.vertex_attrib_format = true;
|
||||
}
|
||||
|
||||
// Vertex buffer
|
||||
if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) &&
|
||||
method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
|
||||
dirty_flags.vertex_array |=
|
||||
1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
|
||||
} else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
|
||||
method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
|
||||
dirty_flags.vertex_array |=
|
||||
1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
|
||||
} else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
|
||||
method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
|
||||
dirty_flags.vertex_array |=
|
||||
1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
|
||||
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
|
||||
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
|
||||
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
|
||||
method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
|
||||
dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
|
||||
}
|
||||
}
|
||||
|
||||
switch (method_call.method) {
|
||||
switch (method) {
|
||||
case MAXWELL3D_REG_INDEX(macros.data): {
|
||||
ProcessMacroUpload(method_call.argument);
|
||||
break;
|
||||
|
||||
@@ -5,8 +5,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
@@ -503,7 +505,7 @@ public:
|
||||
f32 translate_z;
|
||||
INSERT_PADDING_WORDS(2);
|
||||
|
||||
MathUtil::Rectangle<s32> GetRect() const {
|
||||
Common::Rectangle<s32> GetRect() const {
|
||||
return {
|
||||
GetX(), // left
|
||||
GetY() + GetHeight(), // top
|
||||
@@ -1094,19 +1096,18 @@ public:
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
struct DirtyFlags {
|
||||
u8 color_buffer = 0xFF;
|
||||
bool zeta_buffer = true;
|
||||
|
||||
bool shaders = true;
|
||||
std::bitset<8> color_buffer{0xFF};
|
||||
std::bitset<32> vertex_array{0xFFFFFFFF};
|
||||
|
||||
bool vertex_attrib_format = true;
|
||||
u32 vertex_array = 0xFFFFFFFF;
|
||||
bool zeta_buffer = true;
|
||||
bool shaders = true;
|
||||
|
||||
void OnMemoryWrite() {
|
||||
color_buffer = 0xFF;
|
||||
zeta_buffer = true;
|
||||
shaders = true;
|
||||
vertex_array = 0xFFFFFFFF;
|
||||
color_buffer.set();
|
||||
vertex_array.set();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -100,7 +100,7 @@ struct FramebufferConfig {
|
||||
|
||||
using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
|
||||
TransformFlags transform_flags;
|
||||
MathUtil::Rectangle<int> crop_rect;
|
||||
Common::Rectangle<int> crop_rect;
|
||||
};
|
||||
|
||||
namespace Engines {
|
||||
|
||||
@@ -129,6 +129,15 @@ protected:
|
||||
return ++modified_ticks;
|
||||
}
|
||||
|
||||
/// Flushes the specified object, updating appropriate cache state as needed
|
||||
void FlushObject(const T& object) {
|
||||
if (!object->IsDirty()) {
|
||||
return;
|
||||
}
|
||||
object->Flush();
|
||||
object->MarkAsModified(false, *this);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Returns a list of cached objects from the specified memory region, ordered by access time
|
||||
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
|
||||
@@ -154,15 +163,6 @@ private:
|
||||
return objects;
|
||||
}
|
||||
|
||||
/// Flushes the specified object, updating appropriate cache state as needed
|
||||
void FlushObject(const T& object) {
|
||||
if (!object->IsDirty()) {
|
||||
return;
|
||||
}
|
||||
object->Flush();
|
||||
object->MarkAsModified(false, *this);
|
||||
}
|
||||
|
||||
using ObjectSet = std::set<T>;
|
||||
using ObjectCache = std::unordered_map<VAddr, T>;
|
||||
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
|
||||
|
||||
@@ -47,8 +47,8 @@ public:
|
||||
/// Attempt to use a faster method to perform a surface copy
|
||||
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const MathUtil::Rectangle<u32>& src_rect,
|
||||
const MathUtil::Rectangle<u32>& dst_rect) {
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -102,8 +102,8 @@ struct FramebufferCacheKey {
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
|
||||
ScreenInfo& info)
|
||||
: res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
|
||||
buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
|
||||
: res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
|
||||
screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
|
||||
// Create sampler objects
|
||||
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
|
||||
texture_samplers[i].Create();
|
||||
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
}
|
||||
|
||||
// Rebinding the VAO invalidates the vertex buffer bindings.
|
||||
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
|
||||
gpu.dirty_flags.vertex_array.set();
|
||||
|
||||
state.draw.vertex_array = vao_entry.handle;
|
||||
return vao_entry.handle;
|
||||
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
|
||||
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
if (!gpu.dirty_flags.vertex_array)
|
||||
if (gpu.dirty_flags.vertex_array.none())
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||
|
||||
// Upload all guest vertex arrays sequentially to our buffer
|
||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
if (~gpu.dirty_flags.vertex_array & (1u << index))
|
||||
if (!gpu.dirty_flags.vertex_array[index])
|
||||
continue;
|
||||
|
||||
const auto& vertex_array = regs.vertex_array[index];
|
||||
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
|
||||
}
|
||||
}
|
||||
|
||||
gpu.dirty_flags.vertex_array = 0;
|
||||
gpu.dirty_flags.vertex_array.reset();
|
||||
}
|
||||
|
||||
DrawParameters RasterizerOpenGL::SetupDraw() {
|
||||
@@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
|
||||
OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
|
||||
std::optional<std::size_t> single_color_target) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
|
||||
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
|
||||
single_color_target};
|
||||
if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
|
||||
!gpu.dirty_flags.zeta_buffer) {
|
||||
if (fb_config_state == current_framebuffer_config_state &&
|
||||
gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
|
||||
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
|
||||
// single color targets). This is done because the guest registers may not change but the
|
||||
// host framebuffer may contain different attachments
|
||||
@@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
// Add space for at least 18 constant buffers
|
||||
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
|
||||
|
||||
bool invalidate = buffer_cache.Map(buffer_size);
|
||||
const bool invalidate = buffer_cache.Map(buffer_size);
|
||||
if (invalidate) {
|
||||
// As all cached buffers are invalidated, we need to recheck their state.
|
||||
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
|
||||
gpu.dirty_flags.vertex_array.set();
|
||||
}
|
||||
|
||||
const GLuint vao = SetupVertexFormat();
|
||||
@@ -738,9 +738,13 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
shader_program_manager->ApplyTo(state);
|
||||
state.Apply();
|
||||
|
||||
res_cache.SignalPreDrawCall();
|
||||
|
||||
// Execute draw call
|
||||
params.DispatchDraw();
|
||||
|
||||
res_cache.SignalPostDrawCall();
|
||||
|
||||
// Disable scissor test
|
||||
state.viewports[0].scissor.enabled = false;
|
||||
|
||||
@@ -779,8 +783,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const MathUtil::Rectangle<u32>& src_rect,
|
||||
const MathUtil::Rectangle<u32>& dst_rect) {
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
|
||||
return true;
|
||||
@@ -1034,7 +1038,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
|
||||
for (std::size_t i = 0; i < viewport_count; i++) {
|
||||
auto& viewport = current_state.viewports[i];
|
||||
const auto& src = regs.viewports[i];
|
||||
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
|
||||
const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
|
||||
viewport.x = viewport_rect.left;
|
||||
viewport.y = viewport_rect.bottom;
|
||||
viewport.width = viewport_rect.GetWidth();
|
||||
|
||||
@@ -62,8 +62,8 @@ public:
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const MathUtil::Rectangle<u32>& src_rect,
|
||||
const MathUtil::Rectangle<u32>& dst_rect) override;
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect) override;
|
||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) override;
|
||||
bool AccelerateDrawBatch(bool is_indexed) override;
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
@@ -399,7 +400,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
|
||||
return format;
|
||||
}
|
||||
|
||||
MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
|
||||
Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
|
||||
u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
|
||||
if (IsPixelFormatASTC(pixel_format)) {
|
||||
// ASTC formats must stop at the ATSC block size boundary
|
||||
@@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
|
||||
// alternatives. This signals a bug on those functions.
|
||||
const auto width = static_cast<GLsizei>(params.MipWidth(0));
|
||||
const auto height = static_cast<GLsizei>(params.MipHeight(0));
|
||||
memory_size = params.MemorySize();
|
||||
reinterpreted = false;
|
||||
|
||||
const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
|
||||
gl_internal_format = format_tuple.internal_format;
|
||||
@@ -962,30 +965,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
|
||||
auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
|
||||
const auto& regs{gpu.regs};
|
||||
|
||||
if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
|
||||
if (!gpu.dirty_flags.color_buffer[index]) {
|
||||
return last_color_buffers[index];
|
||||
}
|
||||
gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
|
||||
gpu.dirty_flags.color_buffer.reset(index);
|
||||
|
||||
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
|
||||
|
||||
if (index >= regs.rt_control.count) {
|
||||
return last_color_buffers[index] = {};
|
||||
return current_color_buffers[index] = {};
|
||||
}
|
||||
|
||||
if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
|
||||
return last_color_buffers[index] = {};
|
||||
return current_color_buffers[index] = {};
|
||||
}
|
||||
|
||||
const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
|
||||
|
||||
return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
|
||||
return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
|
||||
}
|
||||
|
||||
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
|
||||
surface->LoadGLBuffer();
|
||||
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
|
||||
surface->MarkAsModified(false, *this);
|
||||
surface->MarkForReload(false);
|
||||
}
|
||||
|
||||
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
|
||||
@@ -997,18 +1001,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
|
||||
Surface surface{TryGet(params.addr)};
|
||||
if (surface) {
|
||||
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
|
||||
// Use the cached surface as-is
|
||||
// Use the cached surface as-is unless it's not synced with memory
|
||||
if (surface->MustReload())
|
||||
LoadSurface(surface);
|
||||
return surface;
|
||||
} else if (preserve_contents) {
|
||||
// If surface parameters changed and we care about keeping the previous data, recreate
|
||||
// the surface from the old one
|
||||
Surface new_surface{RecreateSurface(surface, params)};
|
||||
Unregister(surface);
|
||||
UnregisterSurface(surface);
|
||||
Register(new_surface);
|
||||
if (new_surface->IsUploaded()) {
|
||||
RegisterReinterpretSurface(new_surface);
|
||||
}
|
||||
return new_surface;
|
||||
} else {
|
||||
// Delete the old surface before creating a new one to prevent collisions.
|
||||
Unregister(surface);
|
||||
UnregisterSurface(surface);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1062,8 +1071,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
|
||||
}
|
||||
|
||||
static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
|
||||
const MathUtil::Rectangle<u32>& src_rect,
|
||||
const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
|
||||
GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
|
||||
std::size_t cubemap_face = 0) {
|
||||
|
||||
@@ -1193,7 +1202,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
|
||||
void RasterizerCacheOpenGL::FermiCopySurface(
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
|
||||
const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) {
|
||||
const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
|
||||
|
||||
const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
|
||||
const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
|
||||
@@ -1290,4 +1299,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
|
||||
return {};
|
||||
}
|
||||
|
||||
static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
|
||||
u32 height) {
|
||||
for (u32 i = 0; i < params.max_mip_level; i++) {
|
||||
if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
|
||||
return {i};
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
|
||||
const std::size_t size = params.LayerMemorySize();
|
||||
VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
|
||||
for (u32 i = 0; i < params.depth; i++) {
|
||||
if (start == addr) {
|
||||
return {i};
|
||||
}
|
||||
start += size;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
|
||||
const Surface blitted_surface) {
|
||||
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
||||
const auto& src_params = render_surface->GetSurfaceParams();
|
||||
const std::size_t src_memory_size = src_params.size_in_bytes;
|
||||
const std::optional<u32> level =
|
||||
TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
|
||||
if (level.has_value()) {
|
||||
if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
|
||||
src_params.height == dst_params.MipHeight(*level) &&
|
||||
src_params.block_height >= dst_params.MipBlockHeight(*level)) {
|
||||
const std::optional<u32> slot =
|
||||
TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
|
||||
if (slot.has_value()) {
|
||||
glCopyImageSubData(render_surface->Texture().handle,
|
||||
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
|
||||
blitted_surface->Texture().handle,
|
||||
SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
|
||||
dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
|
||||
blitted_surface->MarkAsModified(true, cache);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
|
||||
const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
|
||||
const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
|
||||
if (bound2 > bound1)
|
||||
return true;
|
||||
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
||||
const auto& src_params = render_surface->GetSurfaceParams();
|
||||
return (dst_params.component_type != src_params.component_type);
|
||||
}
|
||||
|
||||
static bool IsReinterpretInvalidSecond(const Surface render_surface,
|
||||
const Surface blitted_surface) {
|
||||
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
||||
const auto& src_params = render_surface->GetSurfaceParams();
|
||||
return (dst_params.height > src_params.height && dst_params.width > src_params.width);
|
||||
}
|
||||
|
||||
bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
|
||||
Surface intersect) {
|
||||
if (IsReinterpretInvalid(triggering_surface, intersect)) {
|
||||
UnregisterSurface(intersect);
|
||||
return false;
|
||||
}
|
||||
if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
|
||||
if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
|
||||
UnregisterSurface(intersect);
|
||||
return false;
|
||||
}
|
||||
FlushObject(intersect);
|
||||
FlushObject(triggering_surface);
|
||||
intersect->MarkForReload(true);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void RasterizerCacheOpenGL::SignalPreDrawCall() {
|
||||
if (texception && GLAD_GL_ARB_texture_barrier) {
|
||||
glTextureBarrier();
|
||||
}
|
||||
texception = false;
|
||||
}
|
||||
|
||||
void RasterizerCacheOpenGL::SignalPostDrawCall() {
|
||||
for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
|
||||
if (current_color_buffers[i] != nullptr) {
|
||||
Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
|
||||
if (intersect != nullptr) {
|
||||
PartialReinterpretSurface(current_color_buffers[i], intersect);
|
||||
texception = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -28,12 +28,13 @@ namespace OpenGL {
|
||||
|
||||
class CachedSurface;
|
||||
using Surface = std::shared_ptr<CachedSurface>;
|
||||
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
|
||||
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
|
||||
|
||||
using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
|
||||
using SurfaceType = VideoCore::Surface::SurfaceType;
|
||||
using PixelFormat = VideoCore::Surface::PixelFormat;
|
||||
using ComponentType = VideoCore::Surface::ComponentType;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct SurfaceParams {
|
||||
enum class SurfaceClass {
|
||||
@@ -71,7 +72,7 @@ struct SurfaceParams {
|
||||
}
|
||||
|
||||
/// Returns the rectangle corresponding to this surface
|
||||
MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;
|
||||
Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
|
||||
|
||||
/// Returns the total size of this surface in bytes, adjusted for compression
|
||||
std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
|
||||
@@ -140,10 +141,18 @@ struct SurfaceParams {
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t GetMipmapSingleSize(u32 mip_level) const {
|
||||
return InnerMipmapMemorySize(mip_level, false, is_layered);
|
||||
}
|
||||
|
||||
u32 MipWidth(u32 mip_level) const {
|
||||
return std::max(1U, width >> mip_level);
|
||||
}
|
||||
|
||||
u32 MipWidthGobAligned(u32 mip_level) const {
|
||||
return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
|
||||
}
|
||||
|
||||
u32 MipHeight(u32 mip_level) const {
|
||||
return std::max(1U, height >> mip_level);
|
||||
}
|
||||
@@ -346,6 +355,10 @@ public:
|
||||
return cached_size_in_bytes;
|
||||
}
|
||||
|
||||
std::size_t GetMemorySize() const {
|
||||
return memory_size;
|
||||
}
|
||||
|
||||
void Flush() override {
|
||||
FlushGLBuffer();
|
||||
}
|
||||
@@ -395,6 +408,26 @@ public:
|
||||
Tegra::Texture::SwizzleSource swizzle_z,
|
||||
Tegra::Texture::SwizzleSource swizzle_w);
|
||||
|
||||
void MarkReinterpreted() {
|
||||
reinterpreted = true;
|
||||
}
|
||||
|
||||
bool IsReinterpreted() const {
|
||||
return reinterpreted;
|
||||
}
|
||||
|
||||
void MarkForReload(bool reload) {
|
||||
must_reload = reload;
|
||||
}
|
||||
|
||||
bool MustReload() const {
|
||||
return must_reload;
|
||||
}
|
||||
|
||||
bool IsUploaded() const {
|
||||
return params.identity == SurfaceParams::SurfaceClass::Uploaded;
|
||||
}
|
||||
|
||||
private:
|
||||
void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
|
||||
|
||||
@@ -408,6 +441,9 @@ private:
|
||||
GLenum gl_internal_format{};
|
||||
std::size_t cached_size_in_bytes{};
|
||||
std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
|
||||
std::size_t memory_size;
|
||||
bool reinterpreted = false;
|
||||
bool must_reload = false;
|
||||
};
|
||||
|
||||
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
|
||||
@@ -430,8 +466,11 @@ public:
|
||||
/// Copies the contents of one surface to another
|
||||
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
|
||||
const MathUtil::Rectangle<u32>& src_rect,
|
||||
const MathUtil::Rectangle<u32>& dst_rect);
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect);
|
||||
|
||||
void SignalPreDrawCall();
|
||||
void SignalPostDrawCall();
|
||||
|
||||
private:
|
||||
void LoadSurface(const Surface& surface);
|
||||
@@ -449,6 +488,10 @@ private:
|
||||
/// Tries to get a reserved surface for the specified parameters
|
||||
Surface TryGetReservedSurface(const SurfaceParams& params);
|
||||
|
||||
// Partialy reinterpret a surface based on a triggering_surface that collides with it.
|
||||
// returns true if the reinterpret was successful, false in case it was not.
|
||||
bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
|
||||
|
||||
/// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
|
||||
void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
|
||||
void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
|
||||
@@ -465,12 +508,50 @@ private:
|
||||
OGLFramebuffer read_framebuffer;
|
||||
OGLFramebuffer draw_framebuffer;
|
||||
|
||||
bool texception = false;
|
||||
|
||||
/// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
|
||||
/// using the new format.
|
||||
OGLBuffer copy_pbo;
|
||||
|
||||
std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
|
||||
std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
|
||||
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
|
||||
Surface last_depth_buffer;
|
||||
|
||||
using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
|
||||
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
|
||||
|
||||
static auto GetReinterpretInterval(const Surface& object) {
|
||||
return SurfaceInterval::right_open(object->GetAddr() + 1,
|
||||
object->GetAddr() + object->GetMemorySize() - 1);
|
||||
}
|
||||
|
||||
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
|
||||
SurfaceIntervalCache reinterpreted_surfaces;
|
||||
|
||||
void RegisterReinterpretSurface(Surface reinterpret_surface) {
|
||||
auto interval = GetReinterpretInterval(reinterpret_surface);
|
||||
reinterpreted_surfaces.insert({interval, reinterpret_surface});
|
||||
reinterpret_surface->MarkReinterpreted();
|
||||
}
|
||||
|
||||
Surface CollideOnReinterpretedSurface(VAddr addr) const {
|
||||
const SurfaceInterval interval{addr};
|
||||
for (auto& pair :
|
||||
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
|
||||
return pair.second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Unregisters an object from the cache
|
||||
void UnregisterSurface(const Surface& object) {
|
||||
if (object->IsReinterpreted()) {
|
||||
auto interval = GetReinterpretInterval(object);
|
||||
reinterpreted_surfaces.erase(interval);
|
||||
}
|
||||
Unregister(object);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include <fmt/format.h>
|
||||
#include <lz4.h>
|
||||
|
||||
@@ -257,6 +257,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
||||
const Tegra::FramebufferConfig& framebuffer) {
|
||||
texture.width = framebuffer.width;
|
||||
texture.height = framebuffer.height;
|
||||
texture.pixel_format = framebuffer.pixel_format;
|
||||
|
||||
GLint internal_format;
|
||||
switch (framebuffer.pixel_format) {
|
||||
|
||||
@@ -39,7 +39,7 @@ struct TextureInfo {
|
||||
/// Structure used for storing information about the display target for the Switch screen
|
||||
struct ScreenInfo {
|
||||
GLuint display_texture;
|
||||
const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
|
||||
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
|
||||
TextureInfo texture;
|
||||
};
|
||||
|
||||
@@ -102,7 +102,7 @@ private:
|
||||
|
||||
/// Used for transforming the framebuffer orientation
|
||||
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
|
||||
MathUtil::Rectangle<int> framebuffer_crop_rect;
|
||||
Common::Rectangle<int> framebuffer_crop_rect;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
116
src/video_core/renderer_vulkan/vk_buffer_cache.cpp
Normal file
116
src/video_core/renderer_vulkan/vk_buffer_cache.cpp
Normal file
@@ -0,0 +1,116 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
|
||||
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
|
||||
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
|
||||
: RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
|
||||
const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
|
||||
vk::BufferUsageFlagBits::eIndexBuffer |
|
||||
vk::BufferUsageFlagBits::eUniformBuffer;
|
||||
const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
|
||||
vk::AccessFlagBits::eUniformRead;
|
||||
stream_buffer =
|
||||
std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
|
||||
vk::PipelineStageFlagBits::eAllCommands);
|
||||
buffer_handle = stream_buffer->GetBuffer();
|
||||
}
|
||||
|
||||
VKBufferCache::~VKBufferCache() = default;
|
||||
|
||||
u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
|
||||
bool cache) {
|
||||
const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
// Cache management is a big overhead, so only cache entries with a given size.
|
||||
// TODO: Figure out which size is the best for given games.
|
||||
cache &= size >= 2048;
|
||||
|
||||
if (cache) {
|
||||
if (auto entry = TryGet(*cpu_addr); entry) {
|
||||
if (entry->size >= size && entry->alignment == alignment) {
|
||||
return entry->offset;
|
||||
}
|
||||
Unregister(entry);
|
||||
}
|
||||
}
|
||||
|
||||
AlignBuffer(alignment);
|
||||
const u64 uploaded_offset = buffer_offset;
|
||||
|
||||
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
|
||||
if (cache) {
|
||||
auto entry = std::make_shared<CachedBufferEntry>();
|
||||
entry->offset = uploaded_offset;
|
||||
entry->size = size;
|
||||
entry->alignment = alignment;
|
||||
entry->addr = *cpu_addr;
|
||||
Register(entry);
|
||||
}
|
||||
|
||||
return uploaded_offset;
|
||||
}
|
||||
|
||||
u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
|
||||
AlignBuffer(alignment);
|
||||
std::memcpy(buffer_ptr, raw_pointer, size);
|
||||
const u64 uploaded_offset = buffer_offset;
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return uploaded_offset;
|
||||
}
|
||||
|
||||
std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
|
||||
AlignBuffer(alignment);
|
||||
u8* const uploaded_ptr = buffer_ptr;
|
||||
const u64 uploaded_offset = buffer_offset;
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return {uploaded_ptr, uploaded_offset};
|
||||
}
|
||||
|
||||
void VKBufferCache::Reserve(std::size_t max_size) {
|
||||
bool invalidate;
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
|
||||
buffer_offset = buffer_offset_base;
|
||||
|
||||
if (invalidate) {
|
||||
InvalidateAll();
|
||||
}
|
||||
}
|
||||
|
||||
VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
|
||||
return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
|
||||
}
|
||||
|
||||
void VKBufferCache::AlignBuffer(std::size_t alignment) {
|
||||
// Align the offset, not the mapped pointer
|
||||
const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
|
||||
buffer_ptr += offset_aligned - buffer_offset;
|
||||
buffer_offset = offset_aligned;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
87
src/video_core/renderer_vulkan/vk_buffer_cache.h
Normal file
87
src/video_core/renderer_vulkan/vk_buffer_cache.h
Normal file
@@ -0,0 +1,87 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKFence;
|
||||
class VKMemoryManager;
|
||||
class VKStreamBuffer;
|
||||
|
||||
struct CachedBufferEntry final : public RasterizerCacheObject {
|
||||
VAddr GetAddr() const override {
|
||||
return addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void Flush() override {}
|
||||
|
||||
VAddr addr;
|
||||
std::size_t size;
|
||||
u64 offset;
|
||||
std::size_t alignment;
|
||||
};
|
||||
|
||||
class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
||||
public:
|
||||
explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, VideoCore::RasterizerInterface& rasterizer,
|
||||
const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, u64 size);
|
||||
~VKBufferCache();
|
||||
|
||||
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
|
||||
/// allocated.
|
||||
u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
|
||||
bool cache = true);
|
||||
|
||||
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
|
||||
u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
|
||||
|
||||
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
|
||||
std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
|
||||
|
||||
/// Reserves a region of memory to be used in subsequent upload/reserve operations.
|
||||
void Reserve(std::size_t max_size);
|
||||
|
||||
/// Ensures that the set data is sent to the device.
|
||||
[[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
|
||||
|
||||
/// Returns the buffer cache handle.
|
||||
vk::Buffer GetBuffer() const {
|
||||
return buffer_handle;
|
||||
}
|
||||
|
||||
private:
|
||||
void AlignBuffer(std::size_t alignment);
|
||||
|
||||
Tegra::MemoryManager& tegra_memory_manager;
|
||||
|
||||
std::unique_ptr<VKStreamBuffer> stream_buffer;
|
||||
vk::Buffer buffer_handle;
|
||||
|
||||
u8* buffer_ptr = nullptr;
|
||||
u64 buffer_offset = 0;
|
||||
u64 buffer_offset_base = 0;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
|
||||
|
||||
VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
|
||||
u8* data, u64 begin, u64 end)
|
||||
: allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {}
|
||||
: interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
|
||||
|
||||
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
|
||||
allocation->Free(this);
|
||||
|
||||
@@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) {
|
||||
protected_resources.push_back(resource);
|
||||
}
|
||||
|
||||
void VKFence::Unprotect(const VKResource* resource) {
|
||||
void VKFence::Unprotect(VKResource* resource) {
|
||||
const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
|
||||
if (it != protected_resources.end()) {
|
||||
protected_resources.erase(it);
|
||||
}
|
||||
ASSERT(it != protected_resources.end());
|
||||
|
||||
resource->OnFenceRemoval(this);
|
||||
protected_resources.erase(it);
|
||||
}
|
||||
|
||||
VKFenceWatch::VKFenceWatch() = default;
|
||||
@@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() {
|
||||
}
|
||||
|
||||
void VKFenceWatch::Wait() {
|
||||
if (!fence) {
|
||||
if (fence == nullptr) {
|
||||
return;
|
||||
}
|
||||
fence->Wait();
|
||||
fence->Unprotect(this);
|
||||
fence = nullptr;
|
||||
}
|
||||
|
||||
void VKFenceWatch::Watch(VKFence& new_fence) {
|
||||
|
||||
@@ -63,7 +63,7 @@ public:
|
||||
void Protect(VKResource* resource);
|
||||
|
||||
/// Removes protection for a resource.
|
||||
void Unprotect(const VKResource* resource);
|
||||
void Unprotect(VKResource* resource);
|
||||
|
||||
/// Retreives the fence.
|
||||
operator vk::Fence() const {
|
||||
|
||||
90
src/video_core/renderer_vulkan/vk_stream_buffer.cpp
Normal file
90
src/video_core/renderer_vulkan/vk_stream_buffer.cpp
Normal file
@@ -0,0 +1,90 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||
|
||||
VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
|
||||
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
|
||||
: device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
|
||||
pipeline_stage} {
|
||||
CreateBuffers(memory_manager, usage);
|
||||
ReserveWatches(WATCHES_INITIAL_RESERVE);
|
||||
}
|
||||
|
||||
VKStreamBuffer::~VKStreamBuffer() = default;
|
||||
|
||||
std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
|
||||
ASSERT(size <= buffer_size);
|
||||
mapped_size = size;
|
||||
|
||||
if (offset + size > buffer_size) {
|
||||
// The buffer would overflow, save the amount of used buffers, signal an invalidation and
|
||||
// reset the state.
|
||||
invalidation_mark = used_watches;
|
||||
used_watches = 0;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
|
||||
}
|
||||
|
||||
VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
|
||||
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
|
||||
|
||||
if (invalidation_mark) {
|
||||
// TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
|
||||
exctx = scheduler.Flush();
|
||||
std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
|
||||
[&](auto& resource) { resource->Wait(); });
|
||||
invalidation_mark = std::nullopt;
|
||||
}
|
||||
|
||||
if (used_watches + 1 >= watches.size()) {
|
||||
// Ensure that there are enough watches.
|
||||
ReserveWatches(WATCHES_RESERVE_CHUNK);
|
||||
}
|
||||
// Add a watch for this allocation.
|
||||
watches[used_watches++]->Watch(exctx.GetFence());
|
||||
|
||||
offset += size;
|
||||
|
||||
return exctx;
|
||||
}
|
||||
|
||||
void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
|
||||
const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
|
||||
nullptr);
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
|
||||
commit = memory_manager.Commit(*buffer, true);
|
||||
mapped_pointer = commit->GetData();
|
||||
}
|
||||
|
||||
void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
|
||||
const std::size_t previous_size = watches.size();
|
||||
watches.resize(previous_size + grow_size);
|
||||
std::generate(watches.begin() + previous_size, watches.end(),
|
||||
[]() { return std::make_unique<VKFenceWatch>(); });
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
72
src/video_core/renderer_vulkan/vk_stream_buffer.h
Normal file
72
src/video_core/renderer_vulkan/vk_stream_buffer.h
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKFence;
|
||||
class VKFenceWatch;
|
||||
class VKResourceManager;
|
||||
class VKScheduler;
|
||||
|
||||
class VKStreamBuffer {
|
||||
public:
|
||||
explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
|
||||
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
|
||||
~VKStreamBuffer();
|
||||
|
||||
/**
|
||||
* Reserves a region of memory from the stream buffer.
|
||||
* @param size Size to reserve.
|
||||
* @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
|
||||
* offset and a boolean that's true when buffer has been invalidated.
|
||||
*/
|
||||
std::tuple<u8*, u64, bool> Reserve(u64 size);
|
||||
|
||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||
[[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
|
||||
|
||||
vk::Buffer GetBuffer() const {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Creates Vulkan buffer handles committing the required the required memory.
|
||||
void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
|
||||
|
||||
/// Increases the amount of watches available.
|
||||
void ReserveWatches(std::size_t grow_size);
|
||||
|
||||
const VKDevice& device; ///< Vulkan device manager.
|
||||
VKScheduler& scheduler; ///< Command scheduler.
|
||||
const u64 buffer_size; ///< Total size of the stream buffer.
|
||||
const vk::AccessFlags access; ///< Access usage of this stream buffer.
|
||||
const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
|
||||
|
||||
UniqueBuffer buffer; ///< Mapped buffer.
|
||||
VKMemoryCommit commit; ///< Memory commit.
|
||||
u8* mapped_pointer{}; ///< Pointer to the host visible commit
|
||||
|
||||
u64 offset{}; ///< Buffer iterator.
|
||||
u64 mapped_size{}; ///< Size reserved for the current copy.
|
||||
|
||||
std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
|
||||
std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
|
||||
std::optional<std::size_t>
|
||||
invalidation_mark{}; ///< Number of watches used in the current invalidation.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -398,7 +398,7 @@ void GraphicsSurfaceWidget::OnUpdate() {
|
||||
|
||||
for (unsigned int y = 0; y < surface_height; ++y) {
|
||||
for (unsigned int x = 0; x < surface_width; ++x) {
|
||||
Math::Vec4<u8> color;
|
||||
Common::Vec4<u8> color;
|
||||
color[0] = texture_data[x + y * surface_width + 0];
|
||||
color[1] = texture_data[x + y * surface_width + 1];
|
||||
color[2] = texture_data[x + y * surface_width + 2];
|
||||
|
||||
Reference in New Issue
Block a user