Compare commits
2 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5ca63d0675 | ||
|
|
48e6f77c03 |
@@ -73,6 +73,7 @@ set(HASH_FILES
|
||||
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/memory.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/texture.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/other.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
|
||||
|
||||
@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr
|
||||
|
||||
It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.
|
||||
|
||||
yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success.
|
||||
yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics.
|
||||
|
||||
yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.
|
||||
|
||||
|
||||
2
externals/cubeb
vendored
2
externals/cubeb
vendored
Submodule externals/cubeb updated: 6f2420de8f...12b78c0edf
@@ -46,18 +46,16 @@ struct AudioRendererParameter {
|
||||
u32_le sample_rate;
|
||||
u32_le sample_count;
|
||||
u32_le mix_buffer_count;
|
||||
u32_le submix_count;
|
||||
u32_le unknown_c;
|
||||
u32_le voice_count;
|
||||
u32_le sink_count;
|
||||
u32_le effect_count;
|
||||
u32_le performance_frame_count;
|
||||
u8 is_voice_drop_enabled;
|
||||
u8 unknown_21;
|
||||
u8 unknown_22;
|
||||
u8 execution_mode;
|
||||
u32_le unknown_1c;
|
||||
u8 unknown_20;
|
||||
INSERT_PADDING_BYTES(3);
|
||||
u32_le splitter_count;
|
||||
u32_le num_splitter_send_channels;
|
||||
u32_le unknown_30;
|
||||
u32_le unknown_2c;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32_le revision;
|
||||
};
|
||||
static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
|
||||
|
||||
@@ -12,10 +12,6 @@
|
||||
#include "common/ring_buffer.h"
|
||||
#include "core/settings.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <objbase.h>
|
||||
#endif
|
||||
|
||||
namespace AudioCore {
|
||||
|
||||
class CubebSinkStream final : public SinkStream {
|
||||
@@ -112,11 +108,6 @@ private:
|
||||
};
|
||||
|
||||
CubebSink::CubebSink(std::string_view target_device_name) {
|
||||
// Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
|
||||
#ifdef _MSC_VER
|
||||
com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
|
||||
#endif
|
||||
|
||||
if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
|
||||
LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
|
||||
return;
|
||||
@@ -151,12 +142,6 @@ CubebSink::~CubebSink() {
|
||||
}
|
||||
|
||||
cubeb_destroy(ctx);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
if (SUCCEEDED(com_init_result)) {
|
||||
CoUninitialize();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
|
||||
|
||||
@@ -25,10 +25,6 @@ private:
|
||||
cubeb* ctx{};
|
||||
cubeb_devid output_device{};
|
||||
std::vector<SinkStreamPtr> sink_streams;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
u32 com_init_result = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
std::vector<std::string> ListCubebSinkDevices();
|
||||
|
||||
@@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp
|
||||
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/memory.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/texture.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/other.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
|
||||
|
||||
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
|
||||
/**
|
||||
* Decode a color stored in RGBA8 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
*/
|
||||
inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
|
||||
inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
|
||||
return {bytes[3], bytes[2], bytes[1], bytes[0]};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a color stored in RGB8 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
*/
|
||||
inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
|
||||
inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
|
||||
return {bytes[2], bytes[1], bytes[0], 255};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a color stored in RG8 (aka HILO8) format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
*/
|
||||
inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
|
||||
inline Math::Vec4<u8> DecodeRG8(const u8* bytes) {
|
||||
return {bytes[1], bytes[0], 0, 255};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a color stored in RGB565 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
*/
|
||||
inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
|
||||
inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
|
||||
u16_le pixel;
|
||||
std::memcpy(&pixel, bytes, sizeof(pixel));
|
||||
return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
|
||||
@@ -94,9 +94,9 @@ inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
|
||||
/**
|
||||
* Decode a color stored in RGB5A1 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
*/
|
||||
inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
|
||||
inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
|
||||
u16_le pixel;
|
||||
std::memcpy(&pixel, bytes, sizeof(pixel));
|
||||
return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
|
||||
@@ -106,9 +106,9 @@ inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
|
||||
/**
|
||||
* Decode a color stored in RGBA4 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
* @return Result color decoded as Math::Vec4<u8>
|
||||
*/
|
||||
inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
|
||||
inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
|
||||
u16_le pixel;
|
||||
std::memcpy(&pixel, bytes, sizeof(pixel));
|
||||
return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
|
||||
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
|
||||
/**
|
||||
* Decode a depth value and a stencil value stored in D24S8 format
|
||||
* @param bytes Pointer to encoded source values
|
||||
* @return Resulting values stored as a Common::Vec2
|
||||
* @return Resulting values stored as a Math::Vec2
|
||||
*/
|
||||
inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
|
||||
inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
|
||||
return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
|
||||
}
|
||||
|
||||
@@ -149,7 +149,7 @@ inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
bytes[3] = color.r();
|
||||
bytes[2] = color.g();
|
||||
bytes[1] = color.b();
|
||||
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
bytes[2] = color.r();
|
||||
bytes[1] = color.g();
|
||||
bytes[0] = color.b();
|
||||
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
bytes[1] = color.r();
|
||||
bytes[0] = color.g();
|
||||
}
|
||||
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
const u16_le data =
|
||||
(Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
|
||||
|
||||
@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
|
||||
(Convert8To5(color.b()) << 1) | Convert8To1(color.a());
|
||||
|
||||
@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
|
||||
const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
|
||||
(Convert8To4(color.b()) << 4) | Convert8To4(color.a());
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#include <cstdlib>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Common {
|
||||
namespace MathUtil {
|
||||
|
||||
constexpr float PI = 3.14159265f;
|
||||
|
||||
@@ -41,4 +41,4 @@ struct Rectangle {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Common
|
||||
} // namespace MathUtil
|
||||
|
||||
@@ -6,12 +6,12 @@
|
||||
|
||||
#include "common/vector_math.h"
|
||||
|
||||
namespace Common {
|
||||
namespace Math {
|
||||
|
||||
template <typename T>
|
||||
class Quaternion {
|
||||
public:
|
||||
Vec3<T> xyz;
|
||||
Math::Vec3<T> xyz;
|
||||
T w{};
|
||||
|
||||
Quaternion<decltype(-T{})> Inverse() const {
|
||||
@@ -38,12 +38,12 @@ public:
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
|
||||
auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) {
|
||||
return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
|
||||
}
|
||||
|
||||
inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
|
||||
inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) {
|
||||
return {axis * std::sin(angle / 2), std::cos(angle / 2)};
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Math
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
#include <cmath>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Common {
|
||||
namespace Math {
|
||||
|
||||
template <typename T>
|
||||
class Vec2;
|
||||
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
|
||||
return MakeVec(x, yzw[0], yzw[1], yzw[2]);
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Math
|
||||
|
||||
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
|
||||
framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
|
||||
}
|
||||
|
||||
std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
|
||||
std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) {
|
||||
new_x = std::max(new_x, framebuffer_layout.screen.left);
|
||||
new_x = std::min(new_x, framebuffer_layout.screen.right - 1);
|
||||
|
||||
|
||||
@@ -166,7 +166,7 @@ private:
|
||||
/**
|
||||
* Clip the provided coordinates to be inside the touchscreen area.
|
||||
*/
|
||||
std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
|
||||
std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y);
|
||||
};
|
||||
|
||||
} // namespace Core::Frontend
|
||||
|
||||
@@ -12,12 +12,12 @@ namespace Layout {
|
||||
|
||||
// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
|
||||
template <class T>
|
||||
static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
|
||||
float screen_aspect_ratio) {
|
||||
static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area,
|
||||
float screen_aspect_ratio) {
|
||||
float scale = std::min(static_cast<float>(window_area.GetWidth()),
|
||||
window_area.GetHeight() / screen_aspect_ratio);
|
||||
return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
|
||||
static_cast<T>(std::round(scale * screen_aspect_ratio))};
|
||||
return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
|
||||
static_cast<T>(std::round(scale * screen_aspect_ratio))};
|
||||
}
|
||||
|
||||
FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
|
||||
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
|
||||
|
||||
const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
|
||||
ScreenUndocked::Width};
|
||||
Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
|
||||
Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
|
||||
MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height};
|
||||
MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio);
|
||||
|
||||
float window_aspect_ratio = static_cast<float>(height) / width;
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ struct FramebufferLayout {
|
||||
unsigned width{ScreenUndocked::Width};
|
||||
unsigned height{ScreenUndocked::Height};
|
||||
|
||||
Common::Rectangle<unsigned> screen;
|
||||
MathUtil::Rectangle<unsigned> screen;
|
||||
|
||||
/**
|
||||
* Returns the ration of pixel size of the screen, compared to the native size of the undocked
|
||||
|
||||
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
|
||||
* Orientation is determined by right-hand rule.
|
||||
* Units: deg/sec
|
||||
*/
|
||||
using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
|
||||
using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>;
|
||||
|
||||
/**
|
||||
* A touch device is an input device that returns a tuple of two floats and a bool. The floats are
|
||||
|
||||
@@ -14,7 +14,6 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
|
||||
constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
|
||||
constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
|
||||
constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
|
||||
constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
|
||||
constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
|
||||
constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
|
||||
constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
|
||||
|
||||
@@ -14,47 +14,32 @@
|
||||
namespace Kernel {
|
||||
namespace {
|
||||
constexpr u16 GetSlot(Handle handle) {
|
||||
return static_cast<u16>(handle >> 15);
|
||||
return handle >> 15;
|
||||
}
|
||||
|
||||
constexpr u16 GetGeneration(Handle handle) {
|
||||
return static_cast<u16>(handle & 0x7FFF);
|
||||
return handle & 0x7FFF;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
HandleTable::HandleTable() {
|
||||
next_generation = 1;
|
||||
Clear();
|
||||
}
|
||||
|
||||
HandleTable::~HandleTable() = default;
|
||||
|
||||
ResultCode HandleTable::SetSize(s32 handle_table_size) {
|
||||
if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
|
||||
return ERR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
// Values less than or equal to zero indicate to use the maximum allowable
|
||||
// size for the handle table in the actual kernel, so we ignore the given
|
||||
// value in that case, since we assume this by default unless this function
|
||||
// is called.
|
||||
if (handle_table_size > 0) {
|
||||
table_size = static_cast<u16>(handle_table_size);
|
||||
}
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
|
||||
DEBUG_ASSERT(obj != nullptr);
|
||||
|
||||
const u16 slot = next_free_slot;
|
||||
if (slot >= table_size) {
|
||||
u16 slot = next_free_slot;
|
||||
if (slot >= generations.size()) {
|
||||
LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
|
||||
return ERR_HANDLE_TABLE_FULL;
|
||||
}
|
||||
next_free_slot = generations[slot];
|
||||
|
||||
const u16 generation = next_generation++;
|
||||
u16 generation = next_generation++;
|
||||
|
||||
// Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
|
||||
// Horizon OS uses zero to represent an invalid handle, so skip to 1.
|
||||
@@ -79,11 +64,10 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
|
||||
}
|
||||
|
||||
ResultCode HandleTable::Close(Handle handle) {
|
||||
if (!IsValid(handle)) {
|
||||
if (!IsValid(handle))
|
||||
return ERR_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
const u16 slot = GetSlot(handle);
|
||||
u16 slot = GetSlot(handle);
|
||||
|
||||
objects[slot] = nullptr;
|
||||
|
||||
@@ -93,10 +77,10 @@ ResultCode HandleTable::Close(Handle handle) {
|
||||
}
|
||||
|
||||
bool HandleTable::IsValid(Handle handle) const {
|
||||
const std::size_t slot = GetSlot(handle);
|
||||
const u16 generation = GetGeneration(handle);
|
||||
std::size_t slot = GetSlot(handle);
|
||||
u16 generation = GetGeneration(handle);
|
||||
|
||||
return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
|
||||
return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation;
|
||||
}
|
||||
|
||||
SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
|
||||
@@ -113,7 +97,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
|
||||
}
|
||||
|
||||
void HandleTable::Clear() {
|
||||
for (u16 i = 0; i < table_size; ++i) {
|
||||
for (u16 i = 0; i < MAX_COUNT; ++i) {
|
||||
generations[i] = i + 1;
|
||||
objects[i] = nullptr;
|
||||
}
|
||||
|
||||
@@ -49,20 +49,6 @@ public:
|
||||
HandleTable();
|
||||
~HandleTable();
|
||||
|
||||
/**
|
||||
* Sets the number of handles that may be in use at one time
|
||||
* for this handle table.
|
||||
*
|
||||
* @param handle_table_size The desired size to limit the handle table to.
|
||||
*
|
||||
* @returns an error code indicating if initialization was successful.
|
||||
* If initialization was not successful, then ERR_OUT_OF_MEMORY
|
||||
* will be returned.
|
||||
*
|
||||
* @pre handle_table_size must be within the range [0, 1024]
|
||||
*/
|
||||
ResultCode SetSize(s32 handle_table_size);
|
||||
|
||||
/**
|
||||
* Allocates a handle for the given object.
|
||||
* @return The created Handle or one of the following errors:
|
||||
@@ -117,21 +103,14 @@ private:
|
||||
*/
|
||||
std::array<u16, MAX_COUNT> generations;
|
||||
|
||||
/**
|
||||
* The limited size of the handle table. This can be specified by process
|
||||
* capabilities in order to restrict the overall number of handles that
|
||||
* can be created in a process instance
|
||||
*/
|
||||
u16 table_size = static_cast<u16>(MAX_COUNT);
|
||||
|
||||
/**
|
||||
* Global counter of the number of created handles. Stored in `generations` when a handle is
|
||||
* created, and wraps around to 1 when it hits 0x8000.
|
||||
*/
|
||||
u16 next_generation = 1;
|
||||
u16 next_generation;
|
||||
|
||||
/// Head of the free slots linked list.
|
||||
u16 next_free_slot = 0;
|
||||
u16 next_free_slot;
|
||||
};
|
||||
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -99,13 +99,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
|
||||
vm_manager.Reset(metadata.GetAddressSpaceType());
|
||||
|
||||
const auto& caps = metadata.GetKernelCapabilities();
|
||||
const auto capability_init_result =
|
||||
capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
|
||||
if (capability_init_result.IsError()) {
|
||||
return capability_init_result;
|
||||
}
|
||||
|
||||
return handle_table.SetSize(capabilities.GetHandleTableSize());
|
||||
return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
|
||||
}
|
||||
|
||||
void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
|
||||
|
||||
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
|
||||
interrupt_capabilities.set();
|
||||
|
||||
// Allow using the maximum possible amount of handles
|
||||
handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
|
||||
handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT);
|
||||
|
||||
// Allow all debugging capabilities.
|
||||
is_debuggable = true;
|
||||
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
|
||||
return ERR_RESERVED_VALUE;
|
||||
}
|
||||
|
||||
handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
|
||||
handle_table_size = (flags >> 16) & 0x3FF;
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -156,7 +156,7 @@ public:
|
||||
}
|
||||
|
||||
/// Gets the number of total allowable handles for the process' handle table.
|
||||
s32 GetHandleTableSize() const {
|
||||
u32 GetHandleTableSize() const {
|
||||
return handle_table_size;
|
||||
}
|
||||
|
||||
@@ -252,7 +252,7 @@ private:
|
||||
u64 core_mask = 0;
|
||||
u64 priority_mask = 0;
|
||||
|
||||
s32 handle_table_size = 0;
|
||||
u32 handle_table_size = 0;
|
||||
u32 kernel_version = 0;
|
||||
|
||||
ProgramType program_type = ProgramType::SysModule;
|
||||
|
||||
@@ -37,7 +37,7 @@ public:
|
||||
{8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"},
|
||||
{9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"},
|
||||
{10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"},
|
||||
{11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"},
|
||||
{11, nullptr, "ExecuteAudioRendererRendering"},
|
||||
};
|
||||
// clang-format on
|
||||
RegisterHandlers(functions);
|
||||
@@ -138,17 +138,6 @@ private:
|
||||
rb.Push(rendering_time_limit_percent);
|
||||
}
|
||||
|
||||
void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_Audio, "called");
|
||||
|
||||
// This service command currently only reports an unsupported operation
|
||||
// error code, or aborts. Given that, we just always return an error
|
||||
// code in this case.
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(ResultCode{ErrorModule::Audio, 201});
|
||||
}
|
||||
|
||||
Kernel::EventPair system_event;
|
||||
std::unique_ptr<AudioCore::AudioRenderer> renderer;
|
||||
u32 rendering_time_limit_percent = 100;
|
||||
@@ -246,7 +235,7 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
|
||||
{0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
|
||||
{1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
|
||||
{2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
|
||||
{3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"},
|
||||
{3, nullptr, "OpenAudioRendererAuto"},
|
||||
{4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
|
||||
};
|
||||
// clang-format on
|
||||
@@ -259,7 +248,12 @@ AudRenU::~AudRenU() = default;
|
||||
void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_Audio, "called");
|
||||
|
||||
OpenAudioRendererImpl(ctx);
|
||||
IPC::RequestParser rp{ctx};
|
||||
auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
|
||||
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
|
||||
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
|
||||
}
|
||||
|
||||
void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
|
||||
@@ -268,20 +262,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_Audio, "called");
|
||||
|
||||
u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
|
||||
buffer_sz += params.submix_count * 1024;
|
||||
buffer_sz += 0x940 * (params.submix_count + 1);
|
||||
buffer_sz += params.unknown_c * 1024;
|
||||
buffer_sz += 0x940 * (params.unknown_c + 1);
|
||||
buffer_sz += 0x3F0 * params.voice_count;
|
||||
buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
|
||||
buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10);
|
||||
buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
|
||||
buffer_sz += Common::AlignUp(
|
||||
(0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
|
||||
(params.mix_buffer_count + 6),
|
||||
0x40);
|
||||
buffer_sz +=
|
||||
Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
|
||||
(params.mix_buffer_count + 6),
|
||||
0x40);
|
||||
|
||||
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
|
||||
const u32 count = params.submix_count + 1;
|
||||
u32 count = params.unknown_c + 1;
|
||||
u64 node_count = Common::AlignUp(count, 0x40);
|
||||
const u64 node_state_buffer_sz =
|
||||
u64 node_state_buffer_sz =
|
||||
4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
|
||||
u64 edge_matrix_buffer_sz = 0;
|
||||
node_count = Common::AlignUp(count * count, 0x40);
|
||||
@@ -295,19 +289,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
|
||||
|
||||
buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
|
||||
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
|
||||
buffer_sz += 0xE0 * params.num_splitter_send_channels;
|
||||
buffer_sz += 0xE0 * params.unknown_2c;
|
||||
buffer_sz += 0x20 * params.splitter_count;
|
||||
buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
|
||||
buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10);
|
||||
}
|
||||
buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
|
||||
u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
|
||||
((params.voice_count * 256) | 0x40);
|
||||
|
||||
if (params.performance_frame_count >= 1) {
|
||||
if (params.unknown_1c >= 1) {
|
||||
output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
|
||||
16 * params.voice_count + 16) +
|
||||
0x658) *
|
||||
(params.performance_frame_count + 1) +
|
||||
(params.unknown_1c + 1) +
|
||||
0xc0,
|
||||
0x40) +
|
||||
output_sz;
|
||||
@@ -331,12 +325,6 @@ void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
|
||||
rb.PushIpcInterface<Audio::IAudioDevice>();
|
||||
}
|
||||
|
||||
void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_Audio, "called");
|
||||
|
||||
OpenAudioRendererImpl(ctx);
|
||||
}
|
||||
|
||||
void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
|
||||
LOG_WARNING(Service_Audio, "(STUBBED) called");
|
||||
|
||||
@@ -347,15 +335,6 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c
|
||||
// based on the current revision
|
||||
}
|
||||
|
||||
void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp{ctx};
|
||||
const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
|
||||
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
|
||||
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.PushIpcInterface<IAudioRenderer>(params);
|
||||
}
|
||||
|
||||
bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
|
||||
u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
|
||||
switch (feature) {
|
||||
|
||||
@@ -21,11 +21,8 @@ private:
|
||||
void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
|
||||
void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
|
||||
void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
|
||||
void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx);
|
||||
void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
|
||||
|
||||
void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
|
||||
|
||||
enum class AudioFeatures : u32 {
|
||||
Splitter,
|
||||
};
|
||||
|
||||
@@ -15,7 +15,7 @@ namespace Kernel {
|
||||
class SharedMemory;
|
||||
}
|
||||
|
||||
namespace Service::SM {
|
||||
namespace SM {
|
||||
class ServiceManager;
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
|
||||
|
||||
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
|
||||
u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
|
||||
const Common::Rectangle<int>& crop_rect) {
|
||||
const MathUtil::Rectangle<int>& crop_rect) {
|
||||
VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
|
||||
LOG_TRACE(Service,
|
||||
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
|
||||
|
||||
@@ -25,7 +25,7 @@ public:
|
||||
/// Performs a screen flip, drawing the buffer pointed to by the handle.
|
||||
void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
|
||||
NVFlinger::BufferQueue::BufferTransformFlags transform,
|
||||
const Common::Rectangle<int>& crop_rect);
|
||||
const MathUtil::Rectangle<int>& crop_rect);
|
||||
|
||||
private:
|
||||
std::shared_ptr<nvmap> nvmap_dev;
|
||||
|
||||
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
|
||||
}
|
||||
|
||||
void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
|
||||
const Common::Rectangle<int>& crop_rect) {
|
||||
const MathUtil::Rectangle<int>& crop_rect) {
|
||||
auto itr = std::find_if(queue.begin(), queue.end(),
|
||||
[&](const Buffer& buffer) { return buffer.slot == slot; });
|
||||
ASSERT(itr != queue.end());
|
||||
|
||||
@@ -67,14 +67,14 @@ public:
|
||||
Status status = Status::Free;
|
||||
IGBPBuffer igbp_buffer;
|
||||
BufferTransformFlags transform;
|
||||
Common::Rectangle<int> crop_rect;
|
||||
MathUtil::Rectangle<int> crop_rect;
|
||||
};
|
||||
|
||||
void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
|
||||
std::optional<u32> DequeueBuffer(u32 width, u32 height);
|
||||
const IGBPBuffer& RequestBuffer(u32 slot) const;
|
||||
void QueueBuffer(u32 slot, BufferTransformFlags transform,
|
||||
const Common::Rectangle<int>& crop_rect);
|
||||
const MathUtil::Rectangle<int>& crop_rect);
|
||||
std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
|
||||
void ReleaseBuffer(u32 slot);
|
||||
u32 Query(QueryType type);
|
||||
|
||||
@@ -420,7 +420,7 @@ public:
|
||||
u32_le fence_is_valid;
|
||||
std::array<Fence, 2> fences;
|
||||
|
||||
Common::Rectangle<int> GetCropRect() const {
|
||||
MathUtil::Rectangle<int> GetCropRect() const {
|
||||
return {crop_left, crop_top, crop_right, crop_bottom};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -71,20 +71,15 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
|
||||
FlushMode::FlushAndInvalidate);
|
||||
|
||||
VAddr end = base + size;
|
||||
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
|
||||
base + page_table.pointers.size());
|
||||
while (base != end) {
|
||||
ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base);
|
||||
|
||||
std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
|
||||
page_table.attributes[base] = type;
|
||||
page_table.pointers[base] = memory;
|
||||
|
||||
if (memory == nullptr) {
|
||||
std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
|
||||
} else {
|
||||
while (base != end) {
|
||||
page_table.pointers[base] = memory;
|
||||
|
||||
base += 1;
|
||||
base += 1;
|
||||
if (memory != nullptr)
|
||||
memory += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,12 +32,12 @@ public:
|
||||
}
|
||||
|
||||
void BeginTilt(int x, int y) {
|
||||
mouse_origin = Common::MakeVec(x, y);
|
||||
mouse_origin = Math::MakeVec(x, y);
|
||||
is_tilting = true;
|
||||
}
|
||||
|
||||
void Tilt(int x, int y) {
|
||||
auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
|
||||
auto mouse_move = Math::MakeVec(x, y) - mouse_origin;
|
||||
if (is_tilting) {
|
||||
std::lock_guard<std::mutex> guard(tilt_mutex);
|
||||
if (mouse_move.x == 0 && mouse_move.y == 0) {
|
||||
@@ -45,7 +45,7 @@ public:
|
||||
} else {
|
||||
tilt_direction = mouse_move.Cast<float>();
|
||||
tilt_angle =
|
||||
std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
|
||||
std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -56,7 +56,7 @@ public:
|
||||
is_tilting = false;
|
||||
}
|
||||
|
||||
std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
|
||||
std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() {
|
||||
std::lock_guard<std::mutex> guard(status_mutex);
|
||||
return status;
|
||||
}
|
||||
@@ -66,17 +66,17 @@ private:
|
||||
const std::chrono::steady_clock::duration update_duration;
|
||||
const float sensitivity;
|
||||
|
||||
Common::Vec2<int> mouse_origin;
|
||||
Math::Vec2<int> mouse_origin;
|
||||
|
||||
std::mutex tilt_mutex;
|
||||
Common::Vec2<float> tilt_direction;
|
||||
Math::Vec2<float> tilt_direction;
|
||||
float tilt_angle = 0;
|
||||
|
||||
bool is_tilting = false;
|
||||
|
||||
Common::Event shutdown_event;
|
||||
|
||||
std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
|
||||
std::tuple<Math::Vec3<float>, Math::Vec3<float>> status;
|
||||
std::mutex status_mutex;
|
||||
|
||||
// Note: always keep the thread declaration at the end so that other objects are initialized
|
||||
@@ -85,8 +85,8 @@ private:
|
||||
|
||||
void MotionEmuThread() {
|
||||
auto update_time = std::chrono::steady_clock::now();
|
||||
Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
|
||||
Common::Quaternion<float> old_q;
|
||||
Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0);
|
||||
Math::Quaternion<float> old_q;
|
||||
|
||||
while (!shutdown_event.WaitUntil(update_time)) {
|
||||
update_time += update_duration;
|
||||
@@ -96,18 +96,18 @@ private:
|
||||
std::lock_guard<std::mutex> guard(tilt_mutex);
|
||||
|
||||
// Find the quaternion describing current 3DS tilting
|
||||
q = Common::MakeQuaternion(
|
||||
Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
|
||||
q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x),
|
||||
tilt_angle);
|
||||
}
|
||||
|
||||
auto inv_q = q.Inverse();
|
||||
|
||||
// Set the gravity vector in world space
|
||||
auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
|
||||
auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f);
|
||||
|
||||
// Find the angular rate vector in world space
|
||||
auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
|
||||
angular_rate *= 1000 / update_millisecond / Common::PI * 180;
|
||||
angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180;
|
||||
|
||||
// Transform the two vectors from world space to 3DS space
|
||||
gravity = QuaternionRotate(inv_q, gravity);
|
||||
@@ -131,7 +131,7 @@ public:
|
||||
device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
|
||||
}
|
||||
|
||||
std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
|
||||
std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override {
|
||||
return device->GetStatus();
|
||||
}
|
||||
|
||||
|
||||
@@ -74,6 +74,7 @@ add_library(video_core STATIC
|
||||
shader/decode/hfma2.cpp
|
||||
shader/decode/conversion.cpp
|
||||
shader/decode/memory.cpp
|
||||
shader/decode/texture.cpp
|
||||
shader/decode/float_set_predicate.cpp
|
||||
shader/decode/integer_set_predicate.cpp
|
||||
shader/decode/half_set_predicate.cpp
|
||||
@@ -104,8 +105,6 @@ add_library(video_core STATIC
|
||||
if (ENABLE_VULKAN)
|
||||
target_sources(video_core PRIVATE
|
||||
renderer_vulkan/declarations.h
|
||||
renderer_vulkan/vk_buffer_cache.cpp
|
||||
renderer_vulkan/vk_buffer_cache.h
|
||||
renderer_vulkan/vk_device.cpp
|
||||
renderer_vulkan/vk_device.h
|
||||
renderer_vulkan/vk_memory_manager.cpp
|
||||
@@ -113,9 +112,7 @@ if (ENABLE_VULKAN)
|
||||
renderer_vulkan/vk_resource_manager.cpp
|
||||
renderer_vulkan/vk_resource_manager.h
|
||||
renderer_vulkan/vk_scheduler.cpp
|
||||
renderer_vulkan/vk_scheduler.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
renderer_vulkan/vk_stream_buffer.h)
|
||||
renderer_vulkan/vk_scheduler.h)
|
||||
|
||||
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
|
||||
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
|
||||
|
||||
@@ -44,10 +44,10 @@ void Fermi2D::HandleSurfaceCopy() {
|
||||
const u32 src_blit_y2{
|
||||
static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
|
||||
|
||||
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
|
||||
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
|
||||
regs.blit_dst_x + regs.blit_dst_width,
|
||||
regs.blit_dst_y + regs.blit_dst_height};
|
||||
const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
|
||||
const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
|
||||
regs.blit_dst_x + regs.blit_dst_width,
|
||||
regs.blit_dst_y + regs.blit_dst_height};
|
||||
|
||||
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
|
||||
UNIMPLEMENTED();
|
||||
|
||||
@@ -107,23 +107,21 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
|
||||
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
auto debug_context = system.GetGPUDebugContext();
|
||||
|
||||
const u32 method = method_call.method;
|
||||
|
||||
// It is an error to write to a register other than the current macro's ARG register before it
|
||||
// has finished execution.
|
||||
if (executing_macro != 0) {
|
||||
ASSERT(method == executing_macro + 1);
|
||||
ASSERT(method_call.method == executing_macro + 1);
|
||||
}
|
||||
|
||||
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
|
||||
// uploaded to the GPU during initialization.
|
||||
if (method >= MacroRegistersStart) {
|
||||
if (method_call.method >= MacroRegistersStart) {
|
||||
// We're trying to execute a macro
|
||||
if (executing_macro == 0) {
|
||||
// A macro call must begin by writing the macro method's register, not its argument.
|
||||
ASSERT_MSG((method % 2) == 0,
|
||||
ASSERT_MSG((method_call.method % 2) == 0,
|
||||
"Can't start macro execution by writing to the ARGS register");
|
||||
executing_macro = method;
|
||||
executing_macro = method_call.method;
|
||||
}
|
||||
|
||||
macro_params.push_back(method_call.argument);
|
||||
@@ -135,62 +133,66 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
|
||||
"Invalid Maxwell3D register, increase the size of the Regs structure");
|
||||
|
||||
if (debug_context) {
|
||||
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
|
||||
}
|
||||
|
||||
if (regs.reg_array[method] != method_call.argument) {
|
||||
regs.reg_array[method] = method_call.argument;
|
||||
if (regs.reg_array[method_call.method] != method_call.argument) {
|
||||
regs.reg_array[method_call.method] = method_call.argument;
|
||||
// Color buffers
|
||||
constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
|
||||
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
|
||||
if (method >= first_rt_reg &&
|
||||
method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
|
||||
const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
|
||||
dirty_flags.color_buffer.set(rt_index);
|
||||
if (method_call.method >= first_rt_reg &&
|
||||
method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
|
||||
const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
|
||||
dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
|
||||
}
|
||||
|
||||
// Zeta buffer
|
||||
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
|
||||
if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
|
||||
method == MAXWELL3D_REG_INDEX(zeta_width) ||
|
||||
method == MAXWELL3D_REG_INDEX(zeta_height) ||
|
||||
(method >= MAXWELL3D_REG_INDEX(zeta) &&
|
||||
method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
|
||||
if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
|
||||
method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
|
||||
method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
|
||||
(method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
|
||||
method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
|
||||
dirty_flags.zeta_buffer = true;
|
||||
}
|
||||
|
||||
// Shader
|
||||
constexpr u32 shader_registers_count =
|
||||
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
|
||||
if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
|
||||
method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
|
||||
if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
|
||||
method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
|
||||
dirty_flags.shaders = true;
|
||||
}
|
||||
|
||||
// Vertex format
|
||||
if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
|
||||
if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
|
||||
method_call.method <
|
||||
MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
|
||||
dirty_flags.vertex_attrib_format = true;
|
||||
}
|
||||
|
||||
// Vertex buffer
|
||||
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
|
||||
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
|
||||
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
|
||||
method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
|
||||
dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
|
||||
if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) &&
|
||||
method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
|
||||
dirty_flags.vertex_array |=
|
||||
1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
|
||||
} else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
|
||||
method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
|
||||
dirty_flags.vertex_array |=
|
||||
1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
|
||||
} else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
|
||||
method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
|
||||
dirty_flags.vertex_array |=
|
||||
1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
|
||||
}
|
||||
}
|
||||
|
||||
switch (method) {
|
||||
switch (method_call.method) {
|
||||
case MAXWELL3D_REG_INDEX(macros.data): {
|
||||
ProcessMacroUpload(method_call.argument);
|
||||
break;
|
||||
|
||||
@@ -5,10 +5,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
@@ -505,7 +503,7 @@ public:
|
||||
f32 translate_z;
|
||||
INSERT_PADDING_WORDS(2);
|
||||
|
||||
Common::Rectangle<s32> GetRect() const {
|
||||
MathUtil::Rectangle<s32> GetRect() const {
|
||||
return {
|
||||
GetX(), // left
|
||||
GetY() + GetHeight(), // top
|
||||
@@ -1096,18 +1094,19 @@ public:
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
struct DirtyFlags {
|
||||
std::bitset<8> color_buffer{0xFF};
|
||||
std::bitset<32> vertex_array{0xFFFFFFFF};
|
||||
|
||||
bool vertex_attrib_format = true;
|
||||
u8 color_buffer = 0xFF;
|
||||
bool zeta_buffer = true;
|
||||
|
||||
bool shaders = true;
|
||||
|
||||
bool vertex_attrib_format = true;
|
||||
u32 vertex_array = 0xFFFFFFFF;
|
||||
|
||||
void OnMemoryWrite() {
|
||||
color_buffer = 0xFF;
|
||||
zeta_buffer = true;
|
||||
shaders = true;
|
||||
color_buffer.set();
|
||||
vertex_array.set();
|
||||
vertex_array = 0xFFFFFFFF;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -325,11 +325,11 @@ enum class TextureQueryType : u64 {
|
||||
|
||||
enum class TextureProcessMode : u64 {
|
||||
None = 0,
|
||||
LZ = 1, // Unknown, appears to be the same as none.
|
||||
LZ = 1, // Load LOD of zero.
|
||||
LB = 2, // Load Bias.
|
||||
LL = 3, // Load LOD (LevelOfDetail)
|
||||
LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
|
||||
LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL
|
||||
LL = 3, // Load LOD.
|
||||
LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
|
||||
LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
|
||||
};
|
||||
|
||||
enum class TextureMiscMode : u64 {
|
||||
@@ -1446,6 +1446,7 @@ public:
|
||||
Flow,
|
||||
Synch,
|
||||
Memory,
|
||||
Texture,
|
||||
FloatSet,
|
||||
FloatSetPredicate,
|
||||
IntegerSet,
|
||||
@@ -1576,14 +1577,14 @@ private:
|
||||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
|
||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
|
||||
INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
|
||||
INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"),
|
||||
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
|
||||
INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
|
||||
INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
|
||||
INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"),
|
||||
INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
|
||||
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
||||
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
|
||||
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
|
||||
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
|
||||
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
|
||||
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
|
||||
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
|
||||
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
|
||||
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
|
||||
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
|
||||
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
|
||||
|
||||
@@ -100,7 +100,7 @@ struct FramebufferConfig {
|
||||
|
||||
using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
|
||||
TransformFlags transform_flags;
|
||||
Common::Rectangle<int> crop_rect;
|
||||
MathUtil::Rectangle<int> crop_rect;
|
||||
};
|
||||
|
||||
namespace Engines {
|
||||
|
||||
@@ -129,15 +129,6 @@ protected:
|
||||
return ++modified_ticks;
|
||||
}
|
||||
|
||||
/// Flushes the specified object, updating appropriate cache state as needed
|
||||
void FlushObject(const T& object) {
|
||||
if (!object->IsDirty()) {
|
||||
return;
|
||||
}
|
||||
object->Flush();
|
||||
object->MarkAsModified(false, *this);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Returns a list of cached objects from the specified memory region, ordered by access time
|
||||
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
|
||||
@@ -163,6 +154,15 @@ private:
|
||||
return objects;
|
||||
}
|
||||
|
||||
/// Flushes the specified object, updating appropriate cache state as needed
|
||||
void FlushObject(const T& object) {
|
||||
if (!object->IsDirty()) {
|
||||
return;
|
||||
}
|
||||
object->Flush();
|
||||
object->MarkAsModified(false, *this);
|
||||
}
|
||||
|
||||
using ObjectSet = std::set<T>;
|
||||
using ObjectCache = std::unordered_map<VAddr, T>;
|
||||
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
|
||||
|
||||
@@ -47,8 +47,8 @@ public:
|
||||
/// Attempt to use a faster method to perform a surface copy
|
||||
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect) {
|
||||
const MathUtil::Rectangle<u32>& src_rect,
|
||||
const MathUtil::Rectangle<u32>& dst_rect) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -102,8 +102,8 @@ struct FramebufferCacheKey {
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
|
||||
ScreenInfo& info)
|
||||
: res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
|
||||
screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
|
||||
: res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
|
||||
buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
|
||||
// Create sampler objects
|
||||
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
|
||||
texture_samplers[i].Create();
|
||||
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
}
|
||||
|
||||
// Rebinding the VAO invalidates the vertex buffer bindings.
|
||||
gpu.dirty_flags.vertex_array.set();
|
||||
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
|
||||
|
||||
state.draw.vertex_array = vao_entry.handle;
|
||||
return vao_entry.handle;
|
||||
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
|
||||
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
if (gpu.dirty_flags.vertex_array.none())
|
||||
if (!gpu.dirty_flags.vertex_array)
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||
|
||||
// Upload all guest vertex arrays sequentially to our buffer
|
||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
if (!gpu.dirty_flags.vertex_array[index])
|
||||
if (~gpu.dirty_flags.vertex_array & (1u << index))
|
||||
continue;
|
||||
|
||||
const auto& vertex_array = regs.vertex_array[index];
|
||||
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
|
||||
}
|
||||
}
|
||||
|
||||
gpu.dirty_flags.vertex_array.reset();
|
||||
gpu.dirty_flags.vertex_array = 0;
|
||||
}
|
||||
|
||||
DrawParameters RasterizerOpenGL::SetupDraw() {
|
||||
@@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
|
||||
OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
|
||||
std::optional<std::size_t> single_color_target) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
|
||||
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
|
||||
single_color_target};
|
||||
if (fb_config_state == current_framebuffer_config_state &&
|
||||
gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
|
||||
if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
|
||||
!gpu.dirty_flags.zeta_buffer) {
|
||||
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
|
||||
// single color targets). This is done because the guest registers may not change but the
|
||||
// host framebuffer may contain different attachments
|
||||
@@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
// Add space for at least 18 constant buffers
|
||||
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
|
||||
|
||||
const bool invalidate = buffer_cache.Map(buffer_size);
|
||||
bool invalidate = buffer_cache.Map(buffer_size);
|
||||
if (invalidate) {
|
||||
// As all cached buffers are invalidated, we need to recheck their state.
|
||||
gpu.dirty_flags.vertex_array.set();
|
||||
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
const GLuint vao = SetupVertexFormat();
|
||||
@@ -738,13 +738,9 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
shader_program_manager->ApplyTo(state);
|
||||
state.Apply();
|
||||
|
||||
res_cache.SignalPreDrawCall();
|
||||
|
||||
// Execute draw call
|
||||
params.DispatchDraw();
|
||||
|
||||
res_cache.SignalPostDrawCall();
|
||||
|
||||
// Disable scissor test
|
||||
state.viewports[0].scissor.enabled = false;
|
||||
|
||||
@@ -783,8 +779,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect) {
|
||||
const MathUtil::Rectangle<u32>& src_rect,
|
||||
const MathUtil::Rectangle<u32>& dst_rect) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
|
||||
return true;
|
||||
@@ -1038,7 +1034,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
|
||||
for (std::size_t i = 0; i < viewport_count; i++) {
|
||||
auto& viewport = current_state.viewports[i];
|
||||
const auto& src = regs.viewports[i];
|
||||
const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
|
||||
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
|
||||
viewport.x = viewport_rect.left;
|
||||
viewport.y = viewport_rect.bottom;
|
||||
viewport.width = viewport_rect.GetWidth();
|
||||
|
||||
@@ -62,8 +62,8 @@ public:
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect) override;
|
||||
const MathUtil::Rectangle<u32>& src_rect,
|
||||
const MathUtil::Rectangle<u32>& dst_rect) override;
|
||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) override;
|
||||
bool AccelerateDrawBatch(bool is_indexed) override;
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
@@ -400,7 +399,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
|
||||
return format;
|
||||
}
|
||||
|
||||
Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
|
||||
MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
|
||||
u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
|
||||
if (IsPixelFormatASTC(pixel_format)) {
|
||||
// ASTC formats must stop at the ATSC block size boundary
|
||||
@@ -550,8 +549,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
|
||||
// alternatives. This signals a bug on those functions.
|
||||
const auto width = static_cast<GLsizei>(params.MipWidth(0));
|
||||
const auto height = static_cast<GLsizei>(params.MipHeight(0));
|
||||
memory_size = params.MemorySize();
|
||||
reinterpreted = false;
|
||||
|
||||
const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
|
||||
gl_internal_format = format_tuple.internal_format;
|
||||
@@ -965,31 +962,30 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
|
||||
auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
|
||||
const auto& regs{gpu.regs};
|
||||
|
||||
if (!gpu.dirty_flags.color_buffer[index]) {
|
||||
return current_color_buffers[index];
|
||||
if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
|
||||
return last_color_buffers[index];
|
||||
}
|
||||
gpu.dirty_flags.color_buffer.reset(index);
|
||||
gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
|
||||
|
||||
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
|
||||
|
||||
if (index >= regs.rt_control.count) {
|
||||
return current_color_buffers[index] = {};
|
||||
return last_color_buffers[index] = {};
|
||||
}
|
||||
|
||||
if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
|
||||
return current_color_buffers[index] = {};
|
||||
return last_color_buffers[index] = {};
|
||||
}
|
||||
|
||||
const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
|
||||
|
||||
return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
|
||||
return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
|
||||
}
|
||||
|
||||
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
|
||||
surface->LoadGLBuffer();
|
||||
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
|
||||
surface->MarkAsModified(false, *this);
|
||||
surface->MarkForReload(false);
|
||||
}
|
||||
|
||||
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
|
||||
@@ -1001,23 +997,18 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
|
||||
Surface surface{TryGet(params.addr)};
|
||||
if (surface) {
|
||||
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
|
||||
// Use the cached surface as-is unless it's not synced with memory
|
||||
if (surface->MustReload())
|
||||
LoadSurface(surface);
|
||||
// Use the cached surface as-is
|
||||
return surface;
|
||||
} else if (preserve_contents) {
|
||||
// If surface parameters changed and we care about keeping the previous data, recreate
|
||||
// the surface from the old one
|
||||
Surface new_surface{RecreateSurface(surface, params)};
|
||||
UnregisterSurface(surface);
|
||||
Unregister(surface);
|
||||
Register(new_surface);
|
||||
if (new_surface->IsUploaded()) {
|
||||
RegisterReinterpretSurface(new_surface);
|
||||
}
|
||||
return new_surface;
|
||||
} else {
|
||||
// Delete the old surface before creating a new one to prevent collisions.
|
||||
UnregisterSurface(surface);
|
||||
Unregister(surface);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1071,8 +1062,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
|
||||
}
|
||||
|
||||
static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
|
||||
const MathUtil::Rectangle<u32>& src_rect,
|
||||
const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
|
||||
GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
|
||||
std::size_t cubemap_face = 0) {
|
||||
|
||||
@@ -1202,7 +1193,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
|
||||
void RasterizerCacheOpenGL::FermiCopySurface(
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
|
||||
const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
|
||||
const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) {
|
||||
|
||||
const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
|
||||
const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
|
||||
@@ -1299,107 +1290,4 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
|
||||
return {};
|
||||
}
|
||||
|
||||
static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
|
||||
u32 height) {
|
||||
for (u32 i = 0; i < params.max_mip_level; i++) {
|
||||
if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
|
||||
return {i};
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
|
||||
const std::size_t size = params.LayerMemorySize();
|
||||
VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
|
||||
for (u32 i = 0; i < params.depth; i++) {
|
||||
if (start == addr) {
|
||||
return {i};
|
||||
}
|
||||
start += size;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
|
||||
const Surface blitted_surface) {
|
||||
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
||||
const auto& src_params = render_surface->GetSurfaceParams();
|
||||
const std::size_t src_memory_size = src_params.size_in_bytes;
|
||||
const std::optional<u32> level =
|
||||
TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
|
||||
if (level.has_value()) {
|
||||
if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
|
||||
src_params.height == dst_params.MipHeight(*level) &&
|
||||
src_params.block_height >= dst_params.MipBlockHeight(*level)) {
|
||||
const std::optional<u32> slot =
|
||||
TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
|
||||
if (slot.has_value()) {
|
||||
glCopyImageSubData(render_surface->Texture().handle,
|
||||
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
|
||||
blitted_surface->Texture().handle,
|
||||
SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
|
||||
dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
|
||||
blitted_surface->MarkAsModified(true, cache);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
|
||||
const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
|
||||
const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
|
||||
if (bound2 > bound1)
|
||||
return true;
|
||||
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
||||
const auto& src_params = render_surface->GetSurfaceParams();
|
||||
return (dst_params.component_type != src_params.component_type);
|
||||
}
|
||||
|
||||
static bool IsReinterpretInvalidSecond(const Surface render_surface,
|
||||
const Surface blitted_surface) {
|
||||
const auto& dst_params = blitted_surface->GetSurfaceParams();
|
||||
const auto& src_params = render_surface->GetSurfaceParams();
|
||||
return (dst_params.height > src_params.height && dst_params.width > src_params.width);
|
||||
}
|
||||
|
||||
bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
|
||||
Surface intersect) {
|
||||
if (IsReinterpretInvalid(triggering_surface, intersect)) {
|
||||
UnregisterSurface(intersect);
|
||||
return false;
|
||||
}
|
||||
if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
|
||||
if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
|
||||
UnregisterSurface(intersect);
|
||||
return false;
|
||||
}
|
||||
FlushObject(intersect);
|
||||
FlushObject(triggering_surface);
|
||||
intersect->MarkForReload(true);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void RasterizerCacheOpenGL::SignalPreDrawCall() {
|
||||
if (texception && GLAD_GL_ARB_texture_barrier) {
|
||||
glTextureBarrier();
|
||||
}
|
||||
texception = false;
|
||||
}
|
||||
|
||||
void RasterizerCacheOpenGL::SignalPostDrawCall() {
|
||||
for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
|
||||
if (current_color_buffers[i] != nullptr) {
|
||||
Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
|
||||
if (intersect != nullptr) {
|
||||
PartialReinterpretSurface(current_color_buffers[i], intersect);
|
||||
texception = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -28,13 +28,12 @@ namespace OpenGL {
|
||||
|
||||
class CachedSurface;
|
||||
using Surface = std::shared_ptr<CachedSurface>;
|
||||
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
|
||||
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
|
||||
|
||||
using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
|
||||
using SurfaceType = VideoCore::Surface::SurfaceType;
|
||||
using PixelFormat = VideoCore::Surface::PixelFormat;
|
||||
using ComponentType = VideoCore::Surface::ComponentType;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct SurfaceParams {
|
||||
enum class SurfaceClass {
|
||||
@@ -72,7 +71,7 @@ struct SurfaceParams {
|
||||
}
|
||||
|
||||
/// Returns the rectangle corresponding to this surface
|
||||
Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
|
||||
MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;
|
||||
|
||||
/// Returns the total size of this surface in bytes, adjusted for compression
|
||||
std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
|
||||
@@ -141,18 +140,10 @@ struct SurfaceParams {
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t GetMipmapSingleSize(u32 mip_level) const {
|
||||
return InnerMipmapMemorySize(mip_level, false, is_layered);
|
||||
}
|
||||
|
||||
u32 MipWidth(u32 mip_level) const {
|
||||
return std::max(1U, width >> mip_level);
|
||||
}
|
||||
|
||||
u32 MipWidthGobAligned(u32 mip_level) const {
|
||||
return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
|
||||
}
|
||||
|
||||
u32 MipHeight(u32 mip_level) const {
|
||||
return std::max(1U, height >> mip_level);
|
||||
}
|
||||
@@ -355,10 +346,6 @@ public:
|
||||
return cached_size_in_bytes;
|
||||
}
|
||||
|
||||
std::size_t GetMemorySize() const {
|
||||
return memory_size;
|
||||
}
|
||||
|
||||
void Flush() override {
|
||||
FlushGLBuffer();
|
||||
}
|
||||
@@ -408,26 +395,6 @@ public:
|
||||
Tegra::Texture::SwizzleSource swizzle_z,
|
||||
Tegra::Texture::SwizzleSource swizzle_w);
|
||||
|
||||
void MarkReinterpreted() {
|
||||
reinterpreted = true;
|
||||
}
|
||||
|
||||
bool IsReinterpreted() const {
|
||||
return reinterpreted;
|
||||
}
|
||||
|
||||
void MarkForReload(bool reload) {
|
||||
must_reload = reload;
|
||||
}
|
||||
|
||||
bool MustReload() const {
|
||||
return must_reload;
|
||||
}
|
||||
|
||||
bool IsUploaded() const {
|
||||
return params.identity == SurfaceParams::SurfaceClass::Uploaded;
|
||||
}
|
||||
|
||||
private:
|
||||
void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
|
||||
|
||||
@@ -441,9 +408,6 @@ private:
|
||||
GLenum gl_internal_format{};
|
||||
std::size_t cached_size_in_bytes{};
|
||||
std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
|
||||
std::size_t memory_size;
|
||||
bool reinterpreted = false;
|
||||
bool must_reload = false;
|
||||
};
|
||||
|
||||
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
|
||||
@@ -466,11 +430,8 @@ public:
|
||||
/// Copies the contents of one surface to another
|
||||
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
|
||||
const Common::Rectangle<u32>& src_rect,
|
||||
const Common::Rectangle<u32>& dst_rect);
|
||||
|
||||
void SignalPreDrawCall();
|
||||
void SignalPostDrawCall();
|
||||
const MathUtil::Rectangle<u32>& src_rect,
|
||||
const MathUtil::Rectangle<u32>& dst_rect);
|
||||
|
||||
private:
|
||||
void LoadSurface(const Surface& surface);
|
||||
@@ -488,10 +449,6 @@ private:
|
||||
/// Tries to get a reserved surface for the specified parameters
|
||||
Surface TryGetReservedSurface(const SurfaceParams& params);
|
||||
|
||||
// Partialy reinterpret a surface based on a triggering_surface that collides with it.
|
||||
// returns true if the reinterpret was successful, false in case it was not.
|
||||
bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
|
||||
|
||||
/// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
|
||||
void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
|
||||
void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
|
||||
@@ -508,50 +465,12 @@ private:
|
||||
OGLFramebuffer read_framebuffer;
|
||||
OGLFramebuffer draw_framebuffer;
|
||||
|
||||
bool texception = false;
|
||||
|
||||
/// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
|
||||
/// using the new format.
|
||||
OGLBuffer copy_pbo;
|
||||
|
||||
std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
|
||||
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
|
||||
std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
|
||||
Surface last_depth_buffer;
|
||||
|
||||
using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
|
||||
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
|
||||
|
||||
static auto GetReinterpretInterval(const Surface& object) {
|
||||
return SurfaceInterval::right_open(object->GetAddr() + 1,
|
||||
object->GetAddr() + object->GetMemorySize() - 1);
|
||||
}
|
||||
|
||||
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
|
||||
SurfaceIntervalCache reinterpreted_surfaces;
|
||||
|
||||
void RegisterReinterpretSurface(Surface reinterpret_surface) {
|
||||
auto interval = GetReinterpretInterval(reinterpret_surface);
|
||||
reinterpreted_surfaces.insert({interval, reinterpret_surface});
|
||||
reinterpret_surface->MarkReinterpreted();
|
||||
}
|
||||
|
||||
Surface CollideOnReinterpretedSurface(VAddr addr) const {
|
||||
const SurfaceInterval interval{addr};
|
||||
for (auto& pair :
|
||||
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
|
||||
return pair.second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Unregisters an object from the cache
|
||||
void UnregisterSurface(const Surface& object) {
|
||||
if (object->IsReinterpreted()) {
|
||||
auto interval = GetReinterpretInterval(object);
|
||||
reinterpreted_surfaces.erase(interval);
|
||||
}
|
||||
Unregister(object);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -5,7 +5,9 @@
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
@@ -717,7 +719,7 @@ private:
|
||||
}
|
||||
|
||||
std::string GenerateTexture(Operation operation, const std::string& func,
|
||||
bool is_extra_int = false) {
|
||||
const std::vector<std::pair<Type, Node>>& extras) {
|
||||
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
|
||||
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
@@ -738,36 +740,47 @@ private:
|
||||
expr += Visit(operation[i]);
|
||||
|
||||
const std::size_t next = i + 1;
|
||||
if (next < count || has_array || has_shadow)
|
||||
if (next < count)
|
||||
expr += ", ";
|
||||
}
|
||||
if (has_array) {
|
||||
expr += "float(ftoi(" + Visit(meta->array) + "))";
|
||||
expr += ", float(ftoi(" + Visit(meta->array) + "))";
|
||||
}
|
||||
if (has_shadow) {
|
||||
if (has_array)
|
||||
expr += ", ";
|
||||
expr += Visit(meta->depth_compare);
|
||||
expr += ", " + Visit(meta->depth_compare);
|
||||
}
|
||||
expr += ')';
|
||||
|
||||
for (const Node extra : meta->extras) {
|
||||
for (const auto& extra_pair : extras) {
|
||||
const auto [type, operand] = extra_pair;
|
||||
if (operand == nullptr) {
|
||||
continue;
|
||||
}
|
||||
expr += ", ";
|
||||
if (is_extra_int) {
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(extra)) {
|
||||
|
||||
switch (type) {
|
||||
case Type::Int:
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
|
||||
// Inline the string as an immediate integer in GLSL (some extra arguments are
|
||||
// required to be constant)
|
||||
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
|
||||
} else {
|
||||
expr += "ftoi(" + Visit(extra) + ')';
|
||||
expr += "ftoi(" + Visit(operand) + ')';
|
||||
}
|
||||
} else {
|
||||
expr += Visit(extra);
|
||||
break;
|
||||
case Type::Float:
|
||||
expr += Visit(operand);
|
||||
break;
|
||||
default: {
|
||||
const auto type_int = static_cast<u32>(type);
|
||||
UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
|
||||
expr += '0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expr += ')';
|
||||
return expr;
|
||||
return expr + ')';
|
||||
}
|
||||
|
||||
std::string Assign(Operation operation) {
|
||||
@@ -1146,7 +1159,7 @@ private:
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(operation, "texture");
|
||||
std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
|
||||
if (meta->sampler.IsShadow()) {
|
||||
expr = "vec4(" + expr + ')';
|
||||
}
|
||||
@@ -1157,7 +1170,7 @@ private:
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(operation, "textureLod");
|
||||
std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
|
||||
if (meta->sampler.IsShadow()) {
|
||||
expr = "vec4(" + expr + ')';
|
||||
}
|
||||
@@ -1168,7 +1181,8 @@ private:
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) +
|
||||
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
|
||||
return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
|
||||
GetSwizzle(meta->element);
|
||||
}
|
||||
|
||||
@@ -1197,8 +1211,8 @@ private:
|
||||
ASSERT(meta);
|
||||
|
||||
if (meta->element < 2) {
|
||||
return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" +
|
||||
GetSwizzle(meta->element) + "))";
|
||||
return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
|
||||
" * vec2(256))" + GetSwizzle(meta->element) + "))";
|
||||
}
|
||||
return "0";
|
||||
}
|
||||
@@ -1224,9 +1238,9 @@ private:
|
||||
else if (next < count)
|
||||
expr += ", ";
|
||||
}
|
||||
for (std::size_t i = 0; i < meta->extras.size(); ++i) {
|
||||
if (meta->lod) {
|
||||
expr += ", ";
|
||||
expr += CastOperand(Visit(meta->extras.at(i)), Type::Int);
|
||||
expr += CastOperand(Visit(meta->lod), Type::Int);
|
||||
}
|
||||
expr += ')';
|
||||
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include <fmt/format.h>
|
||||
#include <lz4.h>
|
||||
|
||||
@@ -257,7 +257,6 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
||||
const Tegra::FramebufferConfig& framebuffer) {
|
||||
texture.width = framebuffer.width;
|
||||
texture.height = framebuffer.height;
|
||||
texture.pixel_format = framebuffer.pixel_format;
|
||||
|
||||
GLint internal_format;
|
||||
switch (framebuffer.pixel_format) {
|
||||
|
||||
@@ -39,7 +39,7 @@ struct TextureInfo {
|
||||
/// Structure used for storing information about the display target for the Switch screen
|
||||
struct ScreenInfo {
|
||||
GLuint display_texture;
|
||||
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
|
||||
const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
|
||||
TextureInfo texture;
|
||||
};
|
||||
|
||||
@@ -102,7 +102,7 @@ private:
|
||||
|
||||
/// Used for transforming the framebuffer orientation
|
||||
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
|
||||
Common::Rectangle<int> framebuffer_crop_rect;
|
||||
MathUtil::Rectangle<int> framebuffer_crop_rect;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -1,116 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
|
||||
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
|
||||
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
|
||||
: RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
|
||||
const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
|
||||
vk::BufferUsageFlagBits::eIndexBuffer |
|
||||
vk::BufferUsageFlagBits::eUniformBuffer;
|
||||
const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
|
||||
vk::AccessFlagBits::eUniformRead;
|
||||
stream_buffer =
|
||||
std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
|
||||
vk::PipelineStageFlagBits::eAllCommands);
|
||||
buffer_handle = stream_buffer->GetBuffer();
|
||||
}
|
||||
|
||||
VKBufferCache::~VKBufferCache() = default;
|
||||
|
||||
u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
|
||||
bool cache) {
|
||||
const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
// Cache management is a big overhead, so only cache entries with a given size.
|
||||
// TODO: Figure out which size is the best for given games.
|
||||
cache &= size >= 2048;
|
||||
|
||||
if (cache) {
|
||||
if (auto entry = TryGet(*cpu_addr); entry) {
|
||||
if (entry->size >= size && entry->alignment == alignment) {
|
||||
return entry->offset;
|
||||
}
|
||||
Unregister(entry);
|
||||
}
|
||||
}
|
||||
|
||||
AlignBuffer(alignment);
|
||||
const u64 uploaded_offset = buffer_offset;
|
||||
|
||||
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
|
||||
if (cache) {
|
||||
auto entry = std::make_shared<CachedBufferEntry>();
|
||||
entry->offset = uploaded_offset;
|
||||
entry->size = size;
|
||||
entry->alignment = alignment;
|
||||
entry->addr = *cpu_addr;
|
||||
Register(entry);
|
||||
}
|
||||
|
||||
return uploaded_offset;
|
||||
}
|
||||
|
||||
u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
|
||||
AlignBuffer(alignment);
|
||||
std::memcpy(buffer_ptr, raw_pointer, size);
|
||||
const u64 uploaded_offset = buffer_offset;
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return uploaded_offset;
|
||||
}
|
||||
|
||||
std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
|
||||
AlignBuffer(alignment);
|
||||
u8* const uploaded_ptr = buffer_ptr;
|
||||
const u64 uploaded_offset = buffer_offset;
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return {uploaded_ptr, uploaded_offset};
|
||||
}
|
||||
|
||||
void VKBufferCache::Reserve(std::size_t max_size) {
|
||||
bool invalidate;
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
|
||||
buffer_offset = buffer_offset_base;
|
||||
|
||||
if (invalidate) {
|
||||
InvalidateAll();
|
||||
}
|
||||
}
|
||||
|
||||
VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
|
||||
return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
|
||||
}
|
||||
|
||||
void VKBufferCache::AlignBuffer(std::size_t alignment) {
|
||||
// Align the offset, not the mapped pointer
|
||||
const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
|
||||
buffer_ptr += offset_aligned - buffer_offset;
|
||||
buffer_offset = offset_aligned;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -1,87 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKFence;
|
||||
class VKMemoryManager;
|
||||
class VKStreamBuffer;
|
||||
|
||||
struct CachedBufferEntry final : public RasterizerCacheObject {
|
||||
VAddr GetAddr() const override {
|
||||
return addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void Flush() override {}
|
||||
|
||||
VAddr addr;
|
||||
std::size_t size;
|
||||
u64 offset;
|
||||
std::size_t alignment;
|
||||
};
|
||||
|
||||
class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
||||
public:
|
||||
explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, VideoCore::RasterizerInterface& rasterizer,
|
||||
const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, u64 size);
|
||||
~VKBufferCache();
|
||||
|
||||
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
|
||||
/// allocated.
|
||||
u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
|
||||
bool cache = true);
|
||||
|
||||
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
|
||||
u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
|
||||
|
||||
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
|
||||
std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
|
||||
|
||||
/// Reserves a region of memory to be used in subsequent upload/reserve operations.
|
||||
void Reserve(std::size_t max_size);
|
||||
|
||||
/// Ensures that the set data is sent to the device.
|
||||
[[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
|
||||
|
||||
/// Returns the buffer cache handle.
|
||||
vk::Buffer GetBuffer() const {
|
||||
return buffer_handle;
|
||||
}
|
||||
|
||||
private:
|
||||
void AlignBuffer(std::size_t alignment);
|
||||
|
||||
Tegra::MemoryManager& tegra_memory_manager;
|
||||
|
||||
std::unique_ptr<VKStreamBuffer> stream_buffer;
|
||||
vk::Buffer buffer_handle;
|
||||
|
||||
u8* buffer_ptr = nullptr;
|
||||
u64 buffer_offset = 0;
|
||||
u64 buffer_offset_base = 0;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
|
||||
|
||||
VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
|
||||
u8* data, u64 begin, u64 end)
|
||||
: interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
|
||||
: allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {}
|
||||
|
||||
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
|
||||
allocation->Free(this);
|
||||
|
||||
@@ -125,12 +125,11 @@ void VKFence::Protect(VKResource* resource) {
|
||||
protected_resources.push_back(resource);
|
||||
}
|
||||
|
||||
void VKFence::Unprotect(VKResource* resource) {
|
||||
void VKFence::Unprotect(const VKResource* resource) {
|
||||
const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
|
||||
ASSERT(it != protected_resources.end());
|
||||
|
||||
resource->OnFenceRemoval(this);
|
||||
protected_resources.erase(it);
|
||||
if (it != protected_resources.end()) {
|
||||
protected_resources.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
VKFenceWatch::VKFenceWatch() = default;
|
||||
@@ -142,11 +141,12 @@ VKFenceWatch::~VKFenceWatch() {
|
||||
}
|
||||
|
||||
void VKFenceWatch::Wait() {
|
||||
if (fence == nullptr) {
|
||||
if (!fence) {
|
||||
return;
|
||||
}
|
||||
fence->Wait();
|
||||
fence->Unprotect(this);
|
||||
fence = nullptr;
|
||||
}
|
||||
|
||||
void VKFenceWatch::Watch(VKFence& new_fence) {
|
||||
|
||||
@@ -63,7 +63,7 @@ public:
|
||||
void Protect(VKResource* resource);
|
||||
|
||||
/// Removes protection for a resource.
|
||||
void Unprotect(VKResource* resource);
|
||||
void Unprotect(const VKResource* resource);
|
||||
|
||||
/// Retreives the fence.
|
||||
operator vk::Fence() const {
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||
|
||||
VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
|
||||
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
|
||||
: device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
|
||||
pipeline_stage} {
|
||||
CreateBuffers(memory_manager, usage);
|
||||
ReserveWatches(WATCHES_INITIAL_RESERVE);
|
||||
}
|
||||
|
||||
VKStreamBuffer::~VKStreamBuffer() = default;
|
||||
|
||||
std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
|
||||
ASSERT(size <= buffer_size);
|
||||
mapped_size = size;
|
||||
|
||||
if (offset + size > buffer_size) {
|
||||
// The buffer would overflow, save the amount of used buffers, signal an invalidation and
|
||||
// reset the state.
|
||||
invalidation_mark = used_watches;
|
||||
used_watches = 0;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
|
||||
}
|
||||
|
||||
VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
|
||||
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
|
||||
|
||||
if (invalidation_mark) {
|
||||
// TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
|
||||
exctx = scheduler.Flush();
|
||||
std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
|
||||
[&](auto& resource) { resource->Wait(); });
|
||||
invalidation_mark = std::nullopt;
|
||||
}
|
||||
|
||||
if (used_watches + 1 >= watches.size()) {
|
||||
// Ensure that there are enough watches.
|
||||
ReserveWatches(WATCHES_RESERVE_CHUNK);
|
||||
}
|
||||
// Add a watch for this allocation.
|
||||
watches[used_watches++]->Watch(exctx.GetFence());
|
||||
|
||||
offset += size;
|
||||
|
||||
return exctx;
|
||||
}
|
||||
|
||||
void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
|
||||
const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
|
||||
nullptr);
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
|
||||
commit = memory_manager.Commit(*buffer, true);
|
||||
mapped_pointer = commit->GetData();
|
||||
}
|
||||
|
||||
void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
|
||||
const std::size_t previous_size = watches.size();
|
||||
watches.resize(previous_size + grow_size);
|
||||
std::generate(watches.begin() + previous_size, watches.end(),
|
||||
[]() { return std::make_unique<VKFenceWatch>(); });
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -1,72 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKFence;
|
||||
class VKFenceWatch;
|
||||
class VKResourceManager;
|
||||
class VKScheduler;
|
||||
|
||||
class VKStreamBuffer {
|
||||
public:
|
||||
explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
|
||||
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
|
||||
~VKStreamBuffer();
|
||||
|
||||
/**
|
||||
* Reserves a region of memory from the stream buffer.
|
||||
* @param size Size to reserve.
|
||||
* @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
|
||||
* offset and a boolean that's true when buffer has been invalidated.
|
||||
*/
|
||||
std::tuple<u8*, u64, bool> Reserve(u64 size);
|
||||
|
||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||
[[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
|
||||
|
||||
vk::Buffer GetBuffer() const {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Creates Vulkan buffer handles committing the required the required memory.
|
||||
void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
|
||||
|
||||
/// Increases the amount of watches available.
|
||||
void ReserveWatches(std::size_t grow_size);
|
||||
|
||||
const VKDevice& device; ///< Vulkan device manager.
|
||||
VKScheduler& scheduler; ///< Command scheduler.
|
||||
const u64 buffer_size; ///< Total size of the stream buffer.
|
||||
const vk::AccessFlags access; ///< Access usage of this stream buffer.
|
||||
const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
|
||||
|
||||
UniqueBuffer buffer; ///< Mapped buffer.
|
||||
VKMemoryCommit commit; ///< Memory commit.
|
||||
u8* mapped_pointer{}; ///< Pointer to the host visible commit
|
||||
|
||||
u64 offset{}; ///< Buffer iterator.
|
||||
u64 mapped_size{}; ///< Size reserved for the current copy.
|
||||
|
||||
std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
|
||||
std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
|
||||
std::optional<std::size_t>
|
||||
invalidation_mark{}; ///< Number of watches used in the current invalidation.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -165,6 +165,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
|
||||
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
|
||||
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
|
||||
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
|
||||
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
|
||||
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
|
||||
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
|
||||
|
||||
@@ -17,24 +17,6 @@ using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::TextureMiscMode;
|
||||
using Tegra::Shader::TextureProcessMode;
|
||||
using Tegra::Shader::TextureType;
|
||||
|
||||
static std::size_t GetCoordCount(TextureType texture_type) {
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
return 1;
|
||||
case TextureType::Texture2D:
|
||||
return 2;
|
||||
case TextureType::Texture3D:
|
||||
case TextureType::TextureCube:
|
||||
return 3;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
@@ -247,194 +229,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEX: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
|
||||
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const TextureType texture_type{instr.tex.texture_type};
|
||||
const bool is_array = instr.tex.array != 0;
|
||||
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.tex.GetTextureProcessMode();
|
||||
WriteTexInstructionFloat(
|
||||
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEXS: {
|
||||
const TextureType texture_type{instr.texs.GetTextureType()};
|
||||
const bool is_array{instr.texs.IsArrayTexture()};
|
||||
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.texs.GetTextureProcessMode();
|
||||
|
||||
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const Node4 components =
|
||||
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
|
||||
|
||||
if (instr.texs.fp32_flag) {
|
||||
WriteTexsInstructionFloat(bb, instr, components);
|
||||
} else {
|
||||
WriteTexsInstructionHalfFloat(bb, instr, components);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4: {
|
||||
ASSERT(instr.tld4.array == 0);
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
||||
"PTP is not implemented");
|
||||
|
||||
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const auto texture_type = instr.tld4.texture_type.Value();
|
||||
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
||||
const bool is_array = instr.tld4.array != 0;
|
||||
WriteTexInstructionFloat(bb, instr,
|
||||
GetTld4Code(instr, texture_type, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4S: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node op_b = GetRegister(instr.gpr20);
|
||||
|
||||
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
|
||||
std::vector<Node> coords;
|
||||
if (depth_compare) {
|
||||
// Note: TLD4S coordinate encoding works just like TEXS's
|
||||
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_y);
|
||||
coords.push_back(op_b);
|
||||
} else {
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_b);
|
||||
}
|
||||
std::vector<Node> extras;
|
||||
extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
|
||||
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, {}, {}, extras, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, values);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXQ: {
|
||||
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
// TODO: The new commits on the texture refactor, change the way samplers work.
|
||||
// Sadly, not all texture instructions specify the type of texture their sampler
|
||||
// uses. This must be fixed at a later instance.
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
|
||||
|
||||
u32 indexer = 0;
|
||||
switch (instr.txq.query_type) {
|
||||
case Tegra::Shader::TextureQueryType::Dimension: {
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{sampler, {}, {}, {}, element};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
||||
SetTemporal(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
|
||||
static_cast<u32>(instr.txq.query_type.Value()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TMML: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
|
||||
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
auto texture_type = instr.tmml.texture_type.Value();
|
||||
const bool is_array = instr.tmml.array != 0;
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
std::vector<Node> coords;
|
||||
|
||||
// TODO: Add coordinates for different samplers once other texture types are implemented.
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
coords.push_back(GetRegister(instr.gpr8));
|
||||
break;
|
||||
case TextureType::Texture2D:
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
|
||||
|
||||
// Fallback to interpreting as a 2D texture for now
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
texture_type = TextureType::Texture2D;
|
||||
}
|
||||
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporal(bb, element, value);
|
||||
}
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLDS: {
|
||||
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
|
||||
const bool is_array{instr.tlds.IsArrayTexture()};
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
|
||||
|
||||
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
@@ -442,291 +236,4 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
|
||||
bool is_array, bool is_shadow) {
|
||||
const auto offset = static_cast<std::size_t>(sampler.index.Value());
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto itr =
|
||||
std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
[&](const Sampler& entry) { return entry.GetOffset() == offset; });
|
||||
if (itr != used_samplers.end()) {
|
||||
ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
|
||||
itr->IsShadow() == is_shadow);
|
||||
return *itr;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const std::size_t next_index = used_samplers.size();
|
||||
const Sampler entry{offset, next_index, type, is_array, is_shadow};
|
||||
return *used_samplers.emplace(entry).first;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
|
||||
u32 dest_elem = 0;
|
||||
for (u32 elem = 0; elem < 4; ++elem) {
|
||||
if (!instr.tex.IsComponentEnabled(elem)) {
|
||||
// Skip disabled components
|
||||
continue;
|
||||
}
|
||||
SetTemporal(bb, dest_elem++, components[elem]);
|
||||
}
|
||||
// After writing values in temporals, move them to the real registers
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
|
||||
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
|
||||
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
SetTemporal(bb, dest_elem++, components[component]);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
if (i < 2) {
|
||||
// Write the first two swizzle components to gpr0 and gpr0+1
|
||||
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
|
||||
} else {
|
||||
ASSERT(instr.texs.HasTwoDestinations());
|
||||
// Write the rest of the swizzle components to gpr28 and gpr28+1
|
||||
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
|
||||
// float instruction).
|
||||
|
||||
Node4 values;
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
values[dest_elem++] = components[component];
|
||||
}
|
||||
if (dest_elem == 0)
|
||||
return;
|
||||
|
||||
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
|
||||
|
||||
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
|
||||
if (dest_elem <= 2) {
|
||||
SetRegister(bb, instr.gpr0, first_value);
|
||||
return;
|
||||
}
|
||||
|
||||
SetTemporal(bb, 0, first_value);
|
||||
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
|
||||
|
||||
SetRegister(bb, instr.gpr0, GetTemporal(0));
|
||||
SetRegister(bb, instr.gpr28, GetTemporal(1));
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset) {
|
||||
const bool is_array = array;
|
||||
const bool is_shadow = depth_compare;
|
||||
|
||||
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
|
||||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
|
||||
"This method is not supported.");
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
|
||||
|
||||
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
|
||||
process_mode == TextureProcessMode::LL ||
|
||||
process_mode == TextureProcessMode::LLA;
|
||||
|
||||
// LOD selection (either via bias or explicit textureLod) not supported in GL for
|
||||
// sampler2DArrayShadow and samplerCubeArrayShadow.
|
||||
const bool gl_lod_supported =
|
||||
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
|
||||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
|
||||
|
||||
const OperationCode read_method =
|
||||
lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
|
||||
|
||||
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
|
||||
|
||||
std::vector<Node> extras;
|
||||
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
|
||||
if (process_mode == TextureProcessMode::LZ) {
|
||||
extras.push_back(Immediate(0.0f));
|
||||
} else {
|
||||
// If present, lod or bias are always stored in the register indexed by the gpr20
|
||||
// field with an offset depending on the usage of the other registers
|
||||
extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
|
||||
}
|
||||
}
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto copy_coords = coords;
|
||||
MetaTexture meta{sampler, array, depth_compare, extras, element};
|
||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
}
|
||||
// 1D.DC in OpenGL the 2nd component is ignored.
|
||||
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
|
||||
coords.push_back(Immediate(0.0f));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
const u64 last_coord_register =
|
||||
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
const u32 bias_offset = coord_count > 2 ? 1 : 0;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
const bool last = (i == (coord_count - 1)) && (coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||
bool is_array) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (size_t i = 0; i < coord_count; ++i)
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
|
||||
const std::size_t type_coord_count = GetCoordCount(texture_type);
|
||||
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// if is array gpr20 is used
|
||||
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
|
||||
|
||||
const u64 last_coord_register =
|
||||
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < type_coord_count; ++i) {
|
||||
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
// When lod is used always is in gpr20
|
||||
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array, {}, {lod}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
|
||||
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
|
||||
std::size_t max_coords, std::size_t max_inputs) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
|
||||
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
|
||||
if (total_coord_count > max_coords || total_reg_count > max_inputs) {
|
||||
UNIMPLEMENTED_MSG("Unsupported Texture operation");
|
||||
total_coord_count = std::min(total_coord_count, max_coords);
|
||||
}
|
||||
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
|
||||
total_coord_count +=
|
||||
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
|
||||
|
||||
return {coord_count, total_coord_count};
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
534
src/video_core/shader/decode/texture.cpp
Normal file
534
src/video_core/shader/decode/texture.cpp
Normal file
@@ -0,0 +1,534 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::TextureMiscMode;
|
||||
using Tegra::Shader::TextureProcessMode;
|
||||
using Tegra::Shader::TextureType;
|
||||
|
||||
static std::size_t GetCoordCount(TextureType texture_type) {
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
return 1;
|
||||
case TextureType::Texture2D:
|
||||
return 2;
|
||||
case TextureType::Texture3D:
|
||||
case TextureType::TextureCube:
|
||||
return 3;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::TEX: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
|
||||
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const TextureType texture_type{instr.tex.texture_type};
|
||||
const bool is_array = instr.tex.array != 0;
|
||||
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.tex.GetTextureProcessMode();
|
||||
WriteTexInstructionFloat(
|
||||
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEXS: {
|
||||
const TextureType texture_type{instr.texs.GetTextureType()};
|
||||
const bool is_array{instr.texs.IsArrayTexture()};
|
||||
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.texs.GetTextureProcessMode();
|
||||
|
||||
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const Node4 components =
|
||||
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
|
||||
|
||||
if (instr.texs.fp32_flag) {
|
||||
WriteTexsInstructionFloat(bb, instr, components);
|
||||
} else {
|
||||
WriteTexsInstructionHalfFloat(bb, instr, components);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4: {
|
||||
ASSERT(instr.tld4.array == 0);
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
||||
"PTP is not implemented");
|
||||
|
||||
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const auto texture_type = instr.tld4.texture_type.Value();
|
||||
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
||||
const bool is_array = instr.tld4.array != 0;
|
||||
WriteTexInstructionFloat(bb, instr,
|
||||
GetTld4Code(instr, texture_type, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4S: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node op_b = GetRegister(instr.gpr20);
|
||||
|
||||
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
|
||||
std::vector<Node> coords;
|
||||
if (depth_compare) {
|
||||
// Note: TLD4S coordinate encoding works just like TEXS's
|
||||
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_y);
|
||||
coords.push_back(op_b);
|
||||
} else {
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_b);
|
||||
}
|
||||
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
|
||||
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, values);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXQ: {
|
||||
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
// TODO: The new commits on the texture refactor, change the way samplers work.
|
||||
// Sadly, not all texture instructions specify the type of texture their sampler
|
||||
// uses. This must be fixed at a later instance.
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
|
||||
|
||||
u32 indexer = 0;
|
||||
switch (instr.txq.query_type) {
|
||||
case Tegra::Shader::TextureQueryType::Dimension: {
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
||||
SetTemporal(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
|
||||
static_cast<u32>(instr.txq.query_type.Value()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TMML: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
|
||||
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
auto texture_type = instr.tmml.texture_type.Value();
|
||||
const bool is_array = instr.tmml.array != 0;
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
std::vector<Node> coords;
|
||||
|
||||
// TODO: Add coordinates for different samplers once other texture types are implemented.
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
coords.push_back(GetRegister(instr.gpr8));
|
||||
break;
|
||||
case TextureType::Texture2D:
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
|
||||
|
||||
// Fallback to interpreting as a 2D texture for now
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
texture_type = TextureType::Texture2D;
|
||||
}
|
||||
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporal(bb, element, value);
|
||||
}
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLDS: {
|
||||
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
|
||||
const bool is_array{instr.tlds.IsArrayTexture()};
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
|
||||
|
||||
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
|
||||
bool is_array, bool is_shadow) {
|
||||
const auto offset = static_cast<std::size_t>(sampler.index.Value());
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto itr =
|
||||
std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
[&](const Sampler& entry) { return entry.GetOffset() == offset; });
|
||||
if (itr != used_samplers.end()) {
|
||||
ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
|
||||
itr->IsShadow() == is_shadow);
|
||||
return *itr;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const std::size_t next_index = used_samplers.size();
|
||||
const Sampler entry{offset, next_index, type, is_array, is_shadow};
|
||||
return *used_samplers.emplace(entry).first;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
|
||||
u32 dest_elem = 0;
|
||||
for (u32 elem = 0; elem < 4; ++elem) {
|
||||
if (!instr.tex.IsComponentEnabled(elem)) {
|
||||
// Skip disabled components
|
||||
continue;
|
||||
}
|
||||
SetTemporal(bb, dest_elem++, components[elem]);
|
||||
}
|
||||
// After writing values in temporals, move them to the real registers
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
|
||||
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
|
||||
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
SetTemporal(bb, dest_elem++, components[component]);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
if (i < 2) {
|
||||
// Write the first two swizzle components to gpr0 and gpr0+1
|
||||
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
|
||||
} else {
|
||||
ASSERT(instr.texs.HasTwoDestinations());
|
||||
// Write the rest of the swizzle components to gpr28 and gpr28+1
|
||||
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
|
||||
// float instruction).
|
||||
|
||||
Node4 values;
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
values[dest_elem++] = components[component];
|
||||
}
|
||||
if (dest_elem == 0)
|
||||
return;
|
||||
|
||||
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
|
||||
|
||||
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
|
||||
if (dest_elem <= 2) {
|
||||
SetRegister(bb, instr.gpr0, first_value);
|
||||
return;
|
||||
}
|
||||
|
||||
SetTemporal(bb, 0, first_value);
|
||||
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
|
||||
|
||||
SetRegister(bb, instr.gpr0, GetTemporal(0));
|
||||
SetRegister(bb, instr.gpr28, GetTemporal(1));
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset) {
|
||||
const bool is_array = array;
|
||||
const bool is_shadow = depth_compare;
|
||||
|
||||
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
|
||||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
|
||||
"This method is not supported.");
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
|
||||
|
||||
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
|
||||
process_mode == TextureProcessMode::LL ||
|
||||
process_mode == TextureProcessMode::LLA;
|
||||
|
||||
// LOD selection (either via bias or explicit textureLod) not supported in GL for
|
||||
// sampler2DArrayShadow and samplerCubeArrayShadow.
|
||||
const bool gl_lod_supported =
|
||||
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
|
||||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
|
||||
|
||||
const OperationCode read_method =
|
||||
(lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
|
||||
|
||||
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
|
||||
|
||||
Node bias = {};
|
||||
Node lod = {};
|
||||
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
|
||||
switch (process_mode) {
|
||||
case TextureProcessMode::LZ:
|
||||
lod = Immediate(0.0f);
|
||||
break;
|
||||
case TextureProcessMode::LB:
|
||||
// If present, lod or bias are always stored in the register indexed by the gpr20
|
||||
// field with an offset depending on the usage of the other registers
|
||||
bias = GetRegister(instr.gpr20.Value() + bias_offset);
|
||||
break;
|
||||
case TextureProcessMode::LL:
|
||||
lod = GetRegister(instr.gpr20.Value() + bias_offset);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto copy_coords = coords;
|
||||
MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
|
||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
}
|
||||
// 1D.DC in OpenGL the 2nd component is ignored.
|
||||
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
|
||||
coords.push_back(Immediate(0.0f));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
const u64 last_coord_register =
|
||||
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
const u32 bias_offset = coord_count > 2 ? 1 : 0;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
const bool last = (i == (coord_count - 1)) && (coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||
bool is_array) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (size_t i = 0; i < coord_count; ++i)
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
|
||||
const std::size_t type_coord_count = GetCoordCount(texture_type);
|
||||
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// if is array gpr20 is used
|
||||
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
|
||||
|
||||
const u64 last_coord_register =
|
||||
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < type_coord_count; ++i) {
|
||||
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
// When lod is used always is in gpr20
|
||||
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
|
||||
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
|
||||
std::size_t max_coords, std::size_t max_inputs) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
|
||||
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
|
||||
if (total_coord_count > max_coords || total_reg_count > max_inputs) {
|
||||
UNIMPLEMENTED_MSG("Unsupported Texture operation");
|
||||
total_coord_count = std::min(total_coord_count, max_coords);
|
||||
}
|
||||
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
|
||||
total_coord_count +=
|
||||
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
|
||||
|
||||
return {coord_count, total_coord_count};
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -290,7 +290,9 @@ struct MetaTexture {
|
||||
const Sampler& sampler;
|
||||
Node array{};
|
||||
Node depth_compare{};
|
||||
std::vector<Node> extras;
|
||||
Node bias{};
|
||||
Node lod{};
|
||||
Node component{};
|
||||
u32 element{};
|
||||
};
|
||||
|
||||
@@ -614,6 +616,7 @@ private:
|
||||
u32 DecodeHfma2(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeConversion(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeMemory(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeTexture(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <string>
|
||||
|
||||
namespace WebService {
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/web_result.h"
|
||||
#include "core/settings.h"
|
||||
#include "web_service/web_backend.h"
|
||||
|
||||
namespace WebService {
|
||||
|
||||
@@ -61,7 +61,7 @@ void CompatDB::Submit() {
|
||||
button(QWizard::CancelButton)->setVisible(false);
|
||||
|
||||
testcase_watcher.setFuture(QtConcurrent::run(
|
||||
[] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
|
||||
[this]() { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(Frontend, "Unexpected page: {}", currentId());
|
||||
|
||||
@@ -398,7 +398,7 @@ void GraphicsSurfaceWidget::OnUpdate() {
|
||||
|
||||
for (unsigned int y = 0; y < surface_height; ++y) {
|
||||
for (unsigned int x = 0; x < surface_width; ++x) {
|
||||
Common::Vec4<u8> color;
|
||||
Math::Vec4<u8> color;
|
||||
color[0] = texture_data[x + y * surface_width + 0];
|
||||
color[1] = texture_data[x + y * surface_width + 1];
|
||||
color[2] = texture_data[x + y * surface_width + 2];
|
||||
|
||||
Reference in New Issue
Block a user