Compare commits
9 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9a8c1745f1 | ||
|
|
f96d50165f | ||
|
|
f9f541470e | ||
|
|
ce691745dc | ||
|
|
1d59af8f7c | ||
|
|
a6252257eb | ||
|
|
dc5e5ac3b0 | ||
|
|
4f612052b2 | ||
|
|
89eef17670 |
@@ -132,7 +132,7 @@ find_package(Threads REQUIRED)
|
||||
if (ENABLE_SDL2)
|
||||
if (YUZU_USE_BUNDLED_SDL2)
|
||||
# Detect toolchain and platform
|
||||
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
|
||||
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
|
||||
set(SDL2_VER "SDL2-2.0.8")
|
||||
else()
|
||||
message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.")
|
||||
@@ -165,7 +165,7 @@ if (YUZU_USE_BUNDLED_UNICORN)
|
||||
if (MSVC)
|
||||
message(STATUS "unicorn not found, falling back to bundled")
|
||||
# Detect toolchain and platform
|
||||
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
|
||||
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
|
||||
set(UNICORN_VER "unicorn-yuzu")
|
||||
else()
|
||||
message(FATAL_ERROR "No bundled Unicorn binaries for your toolchain. Disable YUZU_USE_BUNDLED_UNICORN and provide your own.")
|
||||
@@ -233,7 +233,7 @@ endif()
|
||||
|
||||
if (ENABLE_QT)
|
||||
if (YUZU_USE_BUNDLED_QT)
|
||||
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
|
||||
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
|
||||
set(QT_VER qt-5.12.0-msvc2017_64)
|
||||
else()
|
||||
message(FATAL_ERROR "No bundled Qt binaries for your toolchain. Disable YUZU_USE_BUNDLED_QT and provide your own.")
|
||||
|
||||
@@ -70,6 +70,7 @@ set(HASH_FILES
|
||||
"${VIDEO_CORE}/shader/decode/half_set.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/image.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/memory.cpp"
|
||||
|
||||
12
externals/glad/include/KHR/khrplatform.h
vendored
12
externals/glad/include/KHR/khrplatform.h
vendored
@@ -90,20 +90,12 @@
|
||||
* int arg2) KHRONOS_APIATTRIBUTES;
|
||||
*/
|
||||
|
||||
#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC)
|
||||
# define KHRONOS_STATIC 1
|
||||
#endif
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Definition of KHRONOS_APICALL
|
||||
*-------------------------------------------------------------------------
|
||||
* This precedes the return type of the function in the function prototype.
|
||||
*/
|
||||
#if defined(KHRONOS_STATIC)
|
||||
/* If the preprocessor constant KHRONOS_STATIC is defined, make the
|
||||
* header compatible with static linking. */
|
||||
# define KHRONOS_APICALL
|
||||
#elif defined(_WIN32)
|
||||
#if defined(_WIN32) && !defined(__SCITECH_SNAP__)
|
||||
# define KHRONOS_APICALL __declspec(dllimport)
|
||||
#elif defined (__SYMBIAN32__)
|
||||
# define KHRONOS_APICALL IMPORT_C
|
||||
@@ -119,7 +111,7 @@
|
||||
* This follows the return type of the function and precedes the function
|
||||
* name in the function prototype.
|
||||
*/
|
||||
#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(KHRONOS_STATIC)
|
||||
#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__)
|
||||
/* Win32 but not WinCE */
|
||||
# define KHRONOS_APIENTRY __stdcall
|
||||
#else
|
||||
|
||||
2388
externals/glad/include/glad/glad.h
vendored
2388
externals/glad/include/glad/glad.h
vendored
File diff suppressed because one or more lines are too long
1125
externals/glad/src/glad.c
vendored
1125
externals/glad/src/glad.c
vendored
File diff suppressed because one or more lines are too long
@@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp
|
||||
"${VIDEO_CORE}/shader/decode/half_set.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/image.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/memory.cpp"
|
||||
|
||||
@@ -438,7 +438,7 @@ inline float RequestParser::Pop() {
|
||||
template <>
|
||||
inline double RequestParser::Pop() {
|
||||
const u64 value = Pop<u64>();
|
||||
double real;
|
||||
float real;
|
||||
std::memcpy(&real, &value, sizeof(real));
|
||||
return real;
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ void SessionRequestHandler::ClientDisconnected(const SharedPtr<ServerSession>& s
|
||||
}
|
||||
|
||||
SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
|
||||
const std::string& reason, u64 timeout, WakeupCallback&& callback,
|
||||
SharedPtr<Thread> thread, const std::string& reason, u64 timeout, WakeupCallback&& callback,
|
||||
SharedPtr<WritableEvent> writable_event) {
|
||||
// Put the client thread to sleep until the wait event is signaled or the timeout expires.
|
||||
thread->SetWakeupCallback([context = *this, callback](
|
||||
@@ -58,7 +58,7 @@ SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
if (!writable_event) {
|
||||
// Create event if not provided
|
||||
const auto pair = WritableEvent::CreateEventPair(kernel, ResetType::Automatic,
|
||||
const auto pair = WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
|
||||
"HLE Pause Event: " + reason);
|
||||
writable_event = pair.writable;
|
||||
}
|
||||
@@ -76,9 +76,8 @@ SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
|
||||
return writable_event;
|
||||
}
|
||||
|
||||
HLERequestContext::HLERequestContext(SharedPtr<Kernel::ServerSession> server_session,
|
||||
SharedPtr<Thread> thread)
|
||||
: server_session(std::move(server_session)), thread(std::move(thread)) {
|
||||
HLERequestContext::HLERequestContext(SharedPtr<Kernel::ServerSession> server_session)
|
||||
: server_session(std::move(server_session)) {
|
||||
cmd_buf[0] = 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ protected:
|
||||
*/
|
||||
class HLERequestContext {
|
||||
public:
|
||||
explicit HLERequestContext(SharedPtr<ServerSession> session, SharedPtr<Thread> thread);
|
||||
explicit HLERequestContext(SharedPtr<ServerSession> session);
|
||||
~HLERequestContext();
|
||||
|
||||
/// Returns a pointer to the IPC command buffer for this request.
|
||||
@@ -119,6 +119,7 @@ public:
|
||||
/**
|
||||
* Puts the specified guest thread to sleep until the returned event is signaled or until the
|
||||
* specified timeout expires.
|
||||
* @param thread Thread to be put to sleep.
|
||||
* @param reason Reason for pausing the thread, to be used for debugging purposes.
|
||||
* @param timeout Timeout in nanoseconds after which the thread will be awoken and the callback
|
||||
* invoked with a Timeout reason.
|
||||
@@ -129,8 +130,8 @@ public:
|
||||
* created.
|
||||
* @returns Event that when signaled will resume the thread and call the callback function.
|
||||
*/
|
||||
SharedPtr<WritableEvent> SleepClientThread(const std::string& reason, u64 timeout,
|
||||
WakeupCallback&& callback,
|
||||
SharedPtr<WritableEvent> SleepClientThread(SharedPtr<Thread> thread, const std::string& reason,
|
||||
u64 timeout, WakeupCallback&& callback,
|
||||
SharedPtr<WritableEvent> writable_event = nullptr);
|
||||
|
||||
/// Populates this context with data from the requesting process/thread.
|
||||
@@ -267,7 +268,6 @@ private:
|
||||
|
||||
std::array<u32, IPC::COMMAND_BUFFER_LENGTH> cmd_buf;
|
||||
SharedPtr<Kernel::ServerSession> server_session;
|
||||
SharedPtr<Thread> thread;
|
||||
// TODO(yuriks): Check common usage of this and optimize size accordingly
|
||||
boost::container::small_vector<SharedPtr<Object>, 8> move_objects;
|
||||
boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
|
||||
|
||||
@@ -33,8 +33,8 @@ enum class HandleType : u32 {
|
||||
};
|
||||
|
||||
enum class ResetType {
|
||||
Automatic, ///< Reset automatically on object acquisition
|
||||
Manual, ///< Never reset automatically
|
||||
OneShot, ///< Reset automatically on object acquisition
|
||||
Sticky, ///< Never reset automatically
|
||||
};
|
||||
|
||||
class Object : NonCopyable {
|
||||
|
||||
@@ -21,9 +21,8 @@ bool ReadableEvent::ShouldWait(const Thread* thread) const {
|
||||
void ReadableEvent::Acquire(Thread* thread) {
|
||||
ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
|
||||
|
||||
if (reset_type == ResetType::Automatic) {
|
||||
if (reset_type == ResetType::OneShot)
|
||||
signaled = false;
|
||||
}
|
||||
}
|
||||
|
||||
void ReadableEvent::Signal() {
|
||||
|
||||
@@ -130,7 +130,7 @@ ResultCode ServerSession::HandleSyncRequest(SharedPtr<Thread> thread) {
|
||||
// The ServerSession received a sync request, this means that there's new data available
|
||||
// from its ClientSession, so wake up any threads that may be waiting on a svcReplyAndReceive or
|
||||
// similar.
|
||||
Kernel::HLERequestContext context(this, thread);
|
||||
Kernel::HLERequestContext context(this);
|
||||
u32* cmd_buf = (u32*)Memory::GetPointer(thread->GetTLSAddress());
|
||||
context.PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);
|
||||
|
||||
|
||||
@@ -1255,8 +1255,8 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
|
||||
return vm_manager.MapCodeMemory(dst_address, src_address, size);
|
||||
}
|
||||
|
||||
static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle,
|
||||
u64 dst_address, u64 src_address, u64 size) {
|
||||
ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address,
|
||||
u64 src_address, u64 size) {
|
||||
LOG_DEBUG(Kernel_SVC,
|
||||
"called. process_handle=0x{:08X}, dst_address=0x{:016X}, src_address=0x{:016X}, "
|
||||
"size=0x{:016X}",
|
||||
@@ -1342,7 +1342,7 @@ static void ExitProcess(Core::System& system) {
|
||||
/// Creates a new thread
|
||||
static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg,
|
||||
VAddr stack_top, u32 priority, s32 processor_id) {
|
||||
LOG_DEBUG(Kernel_SVC,
|
||||
LOG_TRACE(Kernel_SVC,
|
||||
"called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, "
|
||||
"threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}",
|
||||
entry_point, arg, stack_top, priority, processor_id, *out_handle);
|
||||
@@ -1402,7 +1402,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
|
||||
|
||||
/// Starts the thread for the provided handle
|
||||
static ResultCode StartThread(Core::System& system, Handle thread_handle) {
|
||||
LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
|
||||
LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
|
||||
|
||||
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
|
||||
const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
|
||||
@@ -1425,7 +1425,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
|
||||
|
||||
/// Called when a thread exits
|
||||
static void ExitThread(Core::System& system) {
|
||||
LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
|
||||
LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
|
||||
|
||||
auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
|
||||
current_thread->Stop();
|
||||
@@ -1435,7 +1435,7 @@ static void ExitThread(Core::System& system) {
|
||||
|
||||
/// Sleep the current thread
|
||||
static void SleepThread(Core::System& system, s64 nanoseconds) {
|
||||
LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds);
|
||||
LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
|
||||
|
||||
enum class SleepType : s64 {
|
||||
YieldWithoutLoadBalancing = 0,
|
||||
@@ -1880,51 +1880,11 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
|
||||
}
|
||||
|
||||
static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
|
||||
u64 affinity_mask) {
|
||||
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}",
|
||||
thread_handle, core, affinity_mask);
|
||||
u64 mask) {
|
||||
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle,
|
||||
mask, core);
|
||||
|
||||
const auto* const current_process = system.Kernel().CurrentProcess();
|
||||
|
||||
if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
|
||||
const u8 ideal_cpu_core = current_process->GetIdealCore();
|
||||
|
||||
ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
|
||||
|
||||
// Set the target CPU to the ideal core specified by the process.
|
||||
core = ideal_cpu_core;
|
||||
affinity_mask = 1ULL << core;
|
||||
} else {
|
||||
const u64 core_mask = current_process->GetCoreMask();
|
||||
|
||||
if ((core_mask | affinity_mask) != core_mask) {
|
||||
LOG_ERROR(
|
||||
Kernel_SVC,
|
||||
"Invalid processor ID specified (core_mask=0x{:08X}, affinity_mask=0x{:016X})",
|
||||
core_mask, affinity_mask);
|
||||
return ERR_INVALID_PROCESSOR_ID;
|
||||
}
|
||||
|
||||
if (affinity_mask == 0) {
|
||||
LOG_ERROR(Kernel_SVC, "Specfified affinity mask is zero.");
|
||||
return ERR_INVALID_COMBINATION;
|
||||
}
|
||||
|
||||
if (core < Core::NUM_CPU_CORES) {
|
||||
if ((affinity_mask & (1ULL << core)) == 0) {
|
||||
LOG_ERROR(Kernel_SVC,
|
||||
"Core is not enabled for the current mask, core={}, mask={:016X}", core,
|
||||
affinity_mask);
|
||||
return ERR_INVALID_COMBINATION;
|
||||
}
|
||||
} else if (core != static_cast<u32>(THREADPROCESSORID_DONT_CARE) &&
|
||||
core != static_cast<u32>(THREADPROCESSORID_DONT_UPDATE)) {
|
||||
LOG_ERROR(Kernel_SVC, "Invalid processor ID specified (core={}).", core);
|
||||
return ERR_INVALID_PROCESSOR_ID;
|
||||
}
|
||||
}
|
||||
|
||||
const auto& handle_table = current_process->GetHandleTable();
|
||||
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
|
||||
const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
|
||||
if (!thread) {
|
||||
LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
|
||||
@@ -1932,7 +1892,40 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
|
||||
return ERR_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
thread->ChangeCore(core, affinity_mask);
|
||||
if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
|
||||
const u8 ideal_cpu_core = thread->GetOwnerProcess()->GetIdealCore();
|
||||
|
||||
ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
|
||||
|
||||
// Set the target CPU to the ideal core specified by the process.
|
||||
core = ideal_cpu_core;
|
||||
mask = 1ULL << core;
|
||||
}
|
||||
|
||||
if (mask == 0) {
|
||||
LOG_ERROR(Kernel_SVC, "Mask is 0");
|
||||
return ERR_INVALID_COMBINATION;
|
||||
}
|
||||
|
||||
/// This value is used to only change the affinity mask without changing the current ideal core.
|
||||
static constexpr u32 OnlyChangeMask = static_cast<u32>(-3);
|
||||
|
||||
if (core == OnlyChangeMask) {
|
||||
core = thread->GetIdealCore();
|
||||
} else if (core >= Core::NUM_CPU_CORES && core != static_cast<u32>(-1)) {
|
||||
LOG_ERROR(Kernel_SVC, "Invalid core specified, got {}", core);
|
||||
return ERR_INVALID_PROCESSOR_ID;
|
||||
}
|
||||
|
||||
// Error out if the input core isn't enabled in the input mask.
|
||||
if (core < Core::NUM_CPU_CORES && (mask & (1ull << core)) == 0) {
|
||||
LOG_ERROR(Kernel_SVC, "Core is not enabled for the current mask, core={}, mask={:016X}",
|
||||
core, mask);
|
||||
return ERR_INVALID_COMBINATION;
|
||||
}
|
||||
|
||||
thread->ChangeCore(core, mask);
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1987,7 +1980,7 @@ static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle
|
||||
|
||||
auto& kernel = system.Kernel();
|
||||
const auto [readable_event, writable_event] =
|
||||
WritableEvent::CreateEventPair(kernel, ResetType::Manual, "CreateEvent");
|
||||
WritableEvent::CreateEventPair(kernel, ResetType::Sticky, "CreateEvent");
|
||||
|
||||
HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable();
|
||||
|
||||
@@ -2190,8 +2183,8 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
|
||||
u32 out_thread_ids_size, Handle debug_handle) {
|
||||
ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
|
||||
u32 out_thread_ids_size, Handle debug_handle) {
|
||||
// TODO: Handle this case when debug events are supported.
|
||||
UNIMPLEMENTED_IF(debug_handle != InvalidHandle);
|
||||
|
||||
|
||||
@@ -30,21 +30,12 @@ enum ThreadPriority : u32 {
|
||||
};
|
||||
|
||||
enum ThreadProcessorId : s32 {
|
||||
/// Indicates that no particular processor core is preferred.
|
||||
THREADPROCESSORID_DONT_CARE = -1,
|
||||
|
||||
/// Run thread on the ideal core specified by the process.
|
||||
THREADPROCESSORID_IDEAL = -2,
|
||||
|
||||
/// Indicates that the preferred processor ID shouldn't be updated in
|
||||
/// a core mask setting operation.
|
||||
THREADPROCESSORID_DONT_UPDATE = -3,
|
||||
|
||||
THREADPROCESSORID_0 = 0, ///< Run thread on core 0
|
||||
THREADPROCESSORID_1 = 1, ///< Run thread on core 1
|
||||
THREADPROCESSORID_2 = 2, ///< Run thread on core 2
|
||||
THREADPROCESSORID_3 = 3, ///< Run thread on core 3
|
||||
THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this
|
||||
THREADPROCESSORID_IDEAL = -2, ///< Run thread on the ideal core specified by the process.
|
||||
THREADPROCESSORID_0 = 0, ///< Run thread on core 0
|
||||
THREADPROCESSORID_1 = 1, ///< Run thread on core 1
|
||||
THREADPROCESSORID_2 = 2, ///< Run thread on core 2
|
||||
THREADPROCESSORID_3 = 3, ///< Run thread on core 3
|
||||
THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this
|
||||
|
||||
/// Allowed CPU mask
|
||||
THREADPROCESSORID_DEFAULT_MASK = (1 << THREADPROCESSORID_0) | (1 << THREADPROCESSORID_1) |
|
||||
|
||||
@@ -276,7 +276,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
|
||||
RegisterHandlers(functions);
|
||||
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
|
||||
launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
|
||||
"ISelfController:LaunchableEvent");
|
||||
}
|
||||
|
||||
@@ -442,10 +442,10 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c
|
||||
|
||||
AppletMessageQueue::AppletMessageQueue() {
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
|
||||
on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
|
||||
"AMMessageQueue:OnMessageRecieved");
|
||||
on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair(
|
||||
kernel, Kernel::ResetType::Automatic, "AMMessageQueue:OperationModeChanged");
|
||||
kernel, Kernel::ResetType::OneShot, "AMMessageQueue:OperationModeChanged");
|
||||
}
|
||||
|
||||
AppletMessageQueue::~AppletMessageQueue() = default;
|
||||
@@ -835,7 +835,6 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
|
||||
return;
|
||||
}
|
||||
|
||||
std::memcpy(backing.buffer.data() + offset, data.data(), data.size());
|
||||
@@ -858,7 +857,6 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
|
||||
return;
|
||||
}
|
||||
|
||||
ctx.WriteBuffer(backing.buffer.data() + offset, size);
|
||||
|
||||
@@ -26,11 +26,11 @@ namespace Service::AM::Applets {
|
||||
AppletDataBroker::AppletDataBroker() {
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
state_changed_event = Kernel::WritableEvent::CreateEventPair(
|
||||
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent");
|
||||
kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:StateChangedEvent");
|
||||
pop_out_data_event = Kernel::WritableEvent::CreateEventPair(
|
||||
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopDataOutEvent");
|
||||
kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopDataOutEvent");
|
||||
pop_interactive_out_data_event = Kernel::WritableEvent::CreateEventPair(
|
||||
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
|
||||
kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
|
||||
}
|
||||
|
||||
AppletDataBroker::~AppletDataBroker() = default;
|
||||
|
||||
@@ -68,7 +68,7 @@ AOC_U::AOC_U() : ServiceFramework("aoc:u"), add_on_content(AccumulateAOCTitleIDs
|
||||
RegisterHandlers(functions);
|
||||
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
|
||||
aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
|
||||
"GetAddOnContentListChanged:Event");
|
||||
}
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ public:
|
||||
// This is the event handle used to check if the audio buffer was released
|
||||
auto& system = Core::System::GetInstance();
|
||||
buffer_event = Kernel::WritableEvent::CreateEventPair(
|
||||
system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased");
|
||||
system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");
|
||||
|
||||
stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
|
||||
audio_params.channel_count, std::move(unique_name),
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
|
||||
#include "audio_core/audio_renderer.h"
|
||||
#include "common/alignment.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/string_util.h"
|
||||
@@ -47,7 +46,7 @@ public:
|
||||
|
||||
auto& system = Core::System::GetInstance();
|
||||
system_event = Kernel::WritableEvent::CreateEventPair(
|
||||
system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent");
|
||||
system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
|
||||
renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
|
||||
system_event.writable);
|
||||
}
|
||||
@@ -179,7 +178,7 @@ public:
|
||||
RegisterHandlers(functions);
|
||||
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
|
||||
buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
|
||||
"IAudioOutBufferReleasedEvent");
|
||||
}
|
||||
|
||||
@@ -263,304 +262,64 @@ void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
|
||||
OpenAudioRendererImpl(ctx);
|
||||
}
|
||||
|
||||
static u64 CalculateNumPerformanceEntries(const AudioCore::AudioRendererParameter& params) {
|
||||
// +1 represents the final mix.
|
||||
return u64{params.effect_count} + params.submix_count + params.sink_count + params.voice_count +
|
||||
1;
|
||||
}
|
||||
|
||||
void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp{ctx};
|
||||
auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
|
||||
LOG_DEBUG(Service_Audio, "called");
|
||||
|
||||
// Several calculations below align the sizes being calculated
|
||||
// onto a 64 byte boundary.
|
||||
static constexpr u64 buffer_alignment_size = 64;
|
||||
u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
|
||||
buffer_sz += params.submix_count * 1024;
|
||||
buffer_sz += 0x940 * (params.submix_count + 1);
|
||||
buffer_sz += 0x3F0 * params.voice_count;
|
||||
buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
|
||||
buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
|
||||
buffer_sz += Common::AlignUp(
|
||||
(0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
|
||||
(params.mix_buffer_count + 6),
|
||||
0x40);
|
||||
|
||||
// Some calculations that calculate portions of the buffer
|
||||
// that will contain information, on the other hand, align
|
||||
// the result of some of their calcularions on a 16 byte boundary.
|
||||
static constexpr u64 info_field_alignment_size = 16;
|
||||
|
||||
// Maximum detail entries that may exist at one time for performance
|
||||
// frame statistics.
|
||||
static constexpr u64 max_perf_detail_entries = 100;
|
||||
|
||||
// Size of the data structure representing the bulk of the voice-related state.
|
||||
static constexpr u64 voice_state_size = 0x100;
|
||||
|
||||
// Size of the upsampler manager data structure
|
||||
constexpr u64 upsampler_manager_size = 0x48;
|
||||
|
||||
// Calculates the part of the size that relates to mix buffers.
|
||||
const auto calculate_mix_buffer_sizes = [](const AudioCore::AudioRendererParameter& params) {
|
||||
// As of 8.0.0 this is the maximum on voice channels.
|
||||
constexpr u64 max_voice_channels = 6;
|
||||
|
||||
// The service expects the sample_count member of the parameters to either be
|
||||
// a value of 160 or 240, so the maximum sample count is assumed in order
|
||||
// to adequately handle all values at runtime.
|
||||
constexpr u64 default_max_sample_count = 240;
|
||||
|
||||
const u64 total_mix_buffers = params.mix_buffer_count + max_voice_channels;
|
||||
|
||||
u64 size = 0;
|
||||
size += total_mix_buffers * (sizeof(s32) * params.sample_count);
|
||||
size += total_mix_buffers * (sizeof(s32) * default_max_sample_count);
|
||||
size += u64{params.submix_count} + params.sink_count;
|
||||
size = Common::AlignUp(size, buffer_alignment_size);
|
||||
size += Common::AlignUp(params.unknown_30, buffer_alignment_size);
|
||||
size += Common::AlignUp(sizeof(s32) * params.mix_buffer_count, buffer_alignment_size);
|
||||
return size;
|
||||
};
|
||||
|
||||
// Calculates the portion of the size related to the mix data (and the sorting thereof).
|
||||
const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) {
|
||||
// The size of the mixing info data structure.
|
||||
constexpr u64 mix_info_size = 0x940;
|
||||
|
||||
// Consists of total submixes with the final mix included.
|
||||
const u64 total_mix_count = u64{params.submix_count} + 1;
|
||||
|
||||
// The total number of effects that may be available to the audio renderer at any time.
|
||||
constexpr u64 max_effects = 256;
|
||||
|
||||
// Calculates the part of the size related to the audio node state.
|
||||
// This will only be used if the audio revision supports the splitter.
|
||||
const auto calculate_node_state_size = [](std::size_t num_nodes) {
|
||||
// Internally within a nodestate, it appears to use a data structure
|
||||
// similar to a std::bitset<64> twice.
|
||||
constexpr u64 bit_size = Common::BitSize<u64>();
|
||||
constexpr u64 num_bitsets = 2;
|
||||
|
||||
// Node state instances have three states internally for performing
|
||||
// depth-first searches of nodes. Initialized, Found, and Done Sorting.
|
||||
constexpr u64 num_states = 3;
|
||||
|
||||
u64 size = 0;
|
||||
size += (num_nodes * num_nodes) * sizeof(s32);
|
||||
size += num_states * (num_nodes * sizeof(s32));
|
||||
size += num_bitsets * (Common::AlignUp(num_nodes, bit_size) / Common::BitSize<u8>());
|
||||
return size;
|
||||
};
|
||||
|
||||
// Calculates the part of the size related to the adjacency (aka edge) matrix.
|
||||
const auto calculate_edge_matrix_size = [](std::size_t num_nodes) {
|
||||
return (num_nodes * num_nodes) * sizeof(s32);
|
||||
};
|
||||
|
||||
u64 size = 0;
|
||||
size += Common::AlignUp(sizeof(void*) * total_mix_count, info_field_alignment_size);
|
||||
size += Common::AlignUp(mix_info_size * total_mix_count, info_field_alignment_size);
|
||||
size += Common::AlignUp(sizeof(s32) * max_effects * params.submix_count,
|
||||
info_field_alignment_size);
|
||||
|
||||
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
|
||||
size += Common::AlignUp(calculate_node_state_size(total_mix_count) +
|
||||
calculate_edge_matrix_size(total_mix_count),
|
||||
info_field_alignment_size);
|
||||
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
|
||||
const u32 count = params.submix_count + 1;
|
||||
u64 node_count = Common::AlignUp(count, 0x40);
|
||||
const u64 node_state_buffer_sz =
|
||||
4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
|
||||
u64 edge_matrix_buffer_sz = 0;
|
||||
node_count = Common::AlignUp(count * count, 0x40);
|
||||
if (node_count >> 31 != 0) {
|
||||
edge_matrix_buffer_sz = (node_count | 7) / 8;
|
||||
} else {
|
||||
edge_matrix_buffer_sz = node_count / 8;
|
||||
}
|
||||
buffer_sz += Common::AlignUp(node_state_buffer_sz + edge_matrix_buffer_sz, 0x10);
|
||||
}
|
||||
|
||||
return size;
|
||||
};
|
||||
buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
|
||||
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
|
||||
buffer_sz += 0xE0 * params.num_splitter_send_channels;
|
||||
buffer_sz += 0x20 * params.splitter_count;
|
||||
buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
|
||||
}
|
||||
buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
|
||||
u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
|
||||
((params.voice_count * 256) | 0x40);
|
||||
|
||||
// Calculates the part of the size related to voice channel info.
|
||||
const auto calculate_voice_info_size = [](const AudioCore::AudioRendererParameter& params) {
|
||||
constexpr u64 voice_info_size = 0x220;
|
||||
constexpr u64 voice_resource_size = 0xD0;
|
||||
|
||||
u64 size = 0;
|
||||
size += Common::AlignUp(sizeof(void*) * params.voice_count, info_field_alignment_size);
|
||||
size += Common::AlignUp(voice_info_size * params.voice_count, info_field_alignment_size);
|
||||
size +=
|
||||
Common::AlignUp(voice_resource_size * params.voice_count, info_field_alignment_size);
|
||||
size += Common::AlignUp(voice_state_size * params.voice_count, info_field_alignment_size);
|
||||
return size;
|
||||
};
|
||||
|
||||
// Calculates the part of the size related to memory pools.
|
||||
const auto calculate_memory_pools_size = [](const AudioCore::AudioRendererParameter& params) {
|
||||
const u64 num_memory_pools = sizeof(s32) * (u64{params.effect_count} + params.voice_count);
|
||||
const u64 memory_pool_info_size = 0x20;
|
||||
return Common::AlignUp(num_memory_pools * memory_pool_info_size, info_field_alignment_size);
|
||||
};
|
||||
|
||||
// Calculates the part of the size related to the splitter context.
|
||||
const auto calculate_splitter_context_size =
|
||||
[this](const AudioCore::AudioRendererParameter& params) -> u64 {
|
||||
if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
constexpr u64 splitter_info_size = 0x20;
|
||||
constexpr u64 splitter_destination_data_size = 0xE0;
|
||||
|
||||
u64 size = 0;
|
||||
size += params.num_splitter_send_channels;
|
||||
size +=
|
||||
Common::AlignUp(splitter_info_size * params.splitter_count, info_field_alignment_size);
|
||||
size += Common::AlignUp(splitter_destination_data_size * params.num_splitter_send_channels,
|
||||
info_field_alignment_size);
|
||||
|
||||
return size;
|
||||
};
|
||||
|
||||
// Calculates the part of the size related to the upsampler info.
|
||||
const auto calculate_upsampler_info_size = [](const AudioCore::AudioRendererParameter& params) {
|
||||
constexpr u64 upsampler_info_size = 0x280;
|
||||
// Yes, using the buffer size over info alignment size is intentional here.
|
||||
return Common::AlignUp(upsampler_info_size * (u64{params.submix_count} + params.sink_count),
|
||||
buffer_alignment_size);
|
||||
};
|
||||
|
||||
// Calculates the part of the size related to effect info.
|
||||
const auto calculate_effect_info_size = [](const AudioCore::AudioRendererParameter& params) {
|
||||
constexpr u64 effect_info_size = 0x2B0;
|
||||
return Common::AlignUp(effect_info_size * params.effect_count, info_field_alignment_size);
|
||||
};
|
||||
|
||||
// Calculates the part of the size related to audio sink info.
|
||||
const auto calculate_sink_info_size = [](const AudioCore::AudioRendererParameter& params) {
|
||||
const u64 sink_info_size = 0x170;
|
||||
return Common::AlignUp(sink_info_size * params.sink_count, info_field_alignment_size);
|
||||
};
|
||||
|
||||
// Calculates the part of the size related to voice state info.
|
||||
const auto calculate_voice_state_size = [](const AudioCore::AudioRendererParameter& params) {
|
||||
const u64 voice_state_size = 0x100;
|
||||
const u64 additional_size = buffer_alignment_size - 1;
|
||||
return Common::AlignUp(voice_state_size * params.voice_count + additional_size,
|
||||
info_field_alignment_size);
|
||||
};
|
||||
|
||||
// Calculates the part of the size related to performance statistics.
|
||||
const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) {
|
||||
// Extra size value appended to the end of the calculation.
|
||||
constexpr u64 appended = 128;
|
||||
|
||||
// Whether or not we assume the newer version of performance metrics data structures.
|
||||
const bool is_v2 =
|
||||
IsFeatureSupported(AudioFeatures::PerformanceMetricsVersion2, params.revision);
|
||||
|
||||
// Data structure sizes
|
||||
constexpr u64 perf_statistics_size = 0x0C;
|
||||
const u64 header_size = is_v2 ? 0x30 : 0x18;
|
||||
const u64 entry_size = is_v2 ? 0x18 : 0x10;
|
||||
const u64 detail_size = is_v2 ? 0x18 : 0x10;
|
||||
|
||||
const u64 entry_count = CalculateNumPerformanceEntries(params);
|
||||
const u64 size_per_frame =
|
||||
header_size + (entry_size * entry_count) + (detail_size * max_perf_detail_entries);
|
||||
|
||||
u64 size = 0;
|
||||
size += Common::AlignUp(size_per_frame * params.performance_frame_count + 1,
|
||||
buffer_alignment_size);
|
||||
size += Common::AlignUp(perf_statistics_size, buffer_alignment_size);
|
||||
size += appended;
|
||||
return size;
|
||||
};
|
||||
|
||||
// Calculates the part of the size that relates to the audio command buffer.
|
||||
const auto calculate_command_buffer_size =
|
||||
[this](const AudioCore::AudioRendererParameter& params) {
|
||||
constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
|
||||
|
||||
if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) {
|
||||
constexpr u64 command_buffer_size = 0x18000;
|
||||
|
||||
return command_buffer_size + alignment;
|
||||
}
|
||||
|
||||
// When the variadic command buffer is supported, this means
|
||||
// the command generator for the audio renderer can issue commands
|
||||
// that are (as one would expect), variable in size. So what we need to do
|
||||
// is determine the maximum possible size for a few command data structures
|
||||
// then multiply them by the amount of present commands indicated by the given
|
||||
// respective audio parameters.
|
||||
|
||||
constexpr u64 max_biquad_filters = 2;
|
||||
constexpr u64 max_mix_buffers = 24;
|
||||
|
||||
constexpr u64 biquad_filter_command_size = 0x2C;
|
||||
|
||||
constexpr u64 depop_mix_command_size = 0x24;
|
||||
constexpr u64 depop_setup_command_size = 0x50;
|
||||
|
||||
constexpr u64 effect_command_max_size = 0x540;
|
||||
|
||||
constexpr u64 mix_command_size = 0x1C;
|
||||
constexpr u64 mix_ramp_command_size = 0x24;
|
||||
constexpr u64 mix_ramp_grouped_command_size = 0x13C;
|
||||
|
||||
constexpr u64 perf_command_size = 0x28;
|
||||
|
||||
constexpr u64 sink_command_size = 0x130;
|
||||
|
||||
constexpr u64 submix_command_max_size =
|
||||
depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers;
|
||||
|
||||
constexpr u64 volume_command_size = 0x1C;
|
||||
constexpr u64 volume_ramp_command_size = 0x20;
|
||||
|
||||
constexpr u64 voice_biquad_filter_command_size =
|
||||
biquad_filter_command_size * max_biquad_filters;
|
||||
constexpr u64 voice_data_command_size = 0x9C;
|
||||
const u64 voice_command_max_size =
|
||||
(params.splitter_count * depop_setup_command_size) +
|
||||
(voice_data_command_size + voice_biquad_filter_command_size +
|
||||
volume_ramp_command_size + mix_ramp_grouped_command_size);
|
||||
|
||||
// Now calculate the individual elements that comprise the size and add them together.
|
||||
const u64 effect_commands_size = params.effect_count * effect_command_max_size;
|
||||
|
||||
const u64 final_mix_commands_size =
|
||||
depop_mix_command_size + volume_command_size * max_mix_buffers;
|
||||
|
||||
const u64 perf_commands_size =
|
||||
perf_command_size *
|
||||
(CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
|
||||
|
||||
const u64 sink_commands_size = params.sink_count * sink_command_size;
|
||||
|
||||
const u64 splitter_commands_size =
|
||||
params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size;
|
||||
|
||||
const u64 submix_commands_size = params.submix_count * submix_command_max_size;
|
||||
|
||||
const u64 voice_commands_size = params.voice_count * voice_command_max_size;
|
||||
|
||||
return effect_commands_size + final_mix_commands_size + perf_commands_size +
|
||||
sink_commands_size + splitter_commands_size + submix_commands_size +
|
||||
voice_commands_size + alignment;
|
||||
};
|
||||
|
||||
IPC::RequestParser rp{ctx};
|
||||
const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
|
||||
|
||||
u64 size = 0;
|
||||
size += calculate_mix_buffer_sizes(params);
|
||||
size += calculate_mix_info_size(params);
|
||||
size += calculate_voice_info_size(params);
|
||||
size += upsampler_manager_size;
|
||||
size += calculate_memory_pools_size(params);
|
||||
size += calculate_splitter_context_size(params);
|
||||
|
||||
size = Common::AlignUp(size, buffer_alignment_size);
|
||||
|
||||
size += calculate_upsampler_info_size(params);
|
||||
size += calculate_effect_info_size(params);
|
||||
size += calculate_sink_info_size(params);
|
||||
size += calculate_voice_state_size(params);
|
||||
size += calculate_perf_size(params);
|
||||
size += calculate_command_buffer_size(params);
|
||||
|
||||
// finally, 4KB page align the size, and we're done.
|
||||
size = Common::AlignUp(size, 4096);
|
||||
if (params.performance_frame_count >= 1) {
|
||||
output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
|
||||
16 * params.voice_count + 16) +
|
||||
0x658) *
|
||||
(params.performance_frame_count + 1) +
|
||||
0xc0,
|
||||
0x40) +
|
||||
output_sz;
|
||||
}
|
||||
output_sz = Common::AlignUp(output_sz + 0x1807e, 0x1000);
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 4};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push<u64>(size);
|
||||
|
||||
LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", size);
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push<u64>(output_sz);
|
||||
|
||||
LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz);
|
||||
}
|
||||
|
||||
void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
|
||||
@@ -598,15 +357,10 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
|
||||
}
|
||||
|
||||
bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
|
||||
// Byte swap
|
||||
const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0');
|
||||
|
||||
u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
|
||||
switch (feature) {
|
||||
case AudioFeatures::Splitter:
|
||||
return version_num >= 2U;
|
||||
case AudioFeatures::PerformanceMetricsVersion2:
|
||||
case AudioFeatures::VariadicCommandBuffer:
|
||||
return version_num >= 5U;
|
||||
return version_num >= 2u;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -28,8 +28,6 @@ private:
|
||||
|
||||
enum class AudioFeatures : u32 {
|
||||
Splitter,
|
||||
PerformanceMetricsVersion2,
|
||||
VariadicCommandBuffer,
|
||||
};
|
||||
|
||||
bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const;
|
||||
|
||||
@@ -34,8 +34,8 @@ public:
|
||||
RegisterHandlers(functions);
|
||||
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
register_event = Kernel::WritableEvent::CreateEventPair(
|
||||
kernel, Kernel::ResetType::Automatic, "BT:RegisterEvent");
|
||||
register_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
|
||||
"BT:RegisterEvent");
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -57,13 +57,13 @@ public:
|
||||
RegisterHandlers(functions);
|
||||
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
|
||||
scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
|
||||
"IBtmUserCore:ScanEvent");
|
||||
connection_event = Kernel::WritableEvent::CreateEventPair(
|
||||
kernel, Kernel::ResetType::Automatic, "IBtmUserCore:ConnectionEvent");
|
||||
kernel, Kernel::ResetType::OneShot, "IBtmUserCore:ConnectionEvent");
|
||||
service_discovery = Kernel::WritableEvent::CreateEventPair(
|
||||
kernel, Kernel::ResetType::Automatic, "IBtmUserCore:Discovery");
|
||||
config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
|
||||
kernel, Kernel::ResetType::OneShot, "IBtmUserCore:Discovery");
|
||||
config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
|
||||
"IBtmUserCore:ConfigEvent");
|
||||
}
|
||||
|
||||
|
||||
@@ -170,7 +170,7 @@ void Controller_NPad::InitNewlyAddedControler(std::size_t controller_idx) {
|
||||
void Controller_NPad::OnInit() {
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
styleset_changed_event = Kernel::WritableEvent::CreateEventPair(
|
||||
kernel, Kernel::ResetType::Automatic, "npad:NpadStyleSetChanged");
|
||||
kernel, Kernel::ResetType::OneShot, "npad:NpadStyleSetChanged");
|
||||
|
||||
if (!IsControllerActivated()) {
|
||||
return;
|
||||
|
||||
@@ -26,7 +26,7 @@ constexpr ResultCode ERR_NO_APPLICATION_AREA(ErrorModule::NFP, 152);
|
||||
Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
|
||||
: ServiceFramework(name), module(std::move(module)) {
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
|
||||
nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
|
||||
"IUser:NFCTagDetected");
|
||||
}
|
||||
|
||||
@@ -67,9 +67,9 @@ public:
|
||||
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
deactivate_event = Kernel::WritableEvent::CreateEventPair(
|
||||
kernel, Kernel::ResetType::Automatic, "IUser:DeactivateEvent");
|
||||
kernel, Kernel::ResetType::OneShot, "IUser:DeactivateEvent");
|
||||
availability_change_event = Kernel::WritableEvent::CreateEventPair(
|
||||
kernel, Kernel::ResetType::Automatic, "IUser:AvailabilityChangeEvent");
|
||||
kernel, Kernel::ResetType::OneShot, "IUser:AvailabilityChangeEvent");
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -62,9 +62,9 @@ public:
|
||||
RegisterHandlers(functions);
|
||||
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
|
||||
event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
|
||||
"IRequest:Event1");
|
||||
event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
|
||||
event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
|
||||
"IRequest:Event2");
|
||||
}
|
||||
|
||||
|
||||
@@ -141,7 +141,7 @@ public:
|
||||
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
finished_event = Kernel::WritableEvent::CreateEventPair(
|
||||
kernel, Kernel::ResetType::Automatic,
|
||||
kernel, Kernel::ResetType::OneShot,
|
||||
"IEnsureNetworkClockAvailabilityService:FinishEvent");
|
||||
}
|
||||
|
||||
|
||||
@@ -129,7 +129,7 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
|
||||
RegisterHandlers(functions);
|
||||
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
|
||||
query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
|
||||
"NVDRV::query_event");
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ namespace Service::NVFlinger {
|
||||
|
||||
BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) {
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
|
||||
buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
|
||||
"BufferQueue NativeHandle");
|
||||
}
|
||||
|
||||
|
||||
@@ -2,15 +2,16 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include "common/logging/log.h"
|
||||
#include "core/hle/ipc_helpers.h"
|
||||
#include "core/hle/kernel/client_port.h"
|
||||
#include "core/hle/kernel/client_session.h"
|
||||
#include "core/hle/service/set/set.h"
|
||||
#include "core/settings.h"
|
||||
|
||||
namespace Service::Set {
|
||||
namespace {
|
||||
|
||||
constexpr std::array<LanguageCode, 17> available_language_codes = {{
|
||||
LanguageCode::JA,
|
||||
LanguageCode::EN_US,
|
||||
@@ -31,35 +32,41 @@ constexpr std::array<LanguageCode, 17> available_language_codes = {{
|
||||
LanguageCode::ZH_HANT,
|
||||
}};
|
||||
|
||||
constexpr std::size_t pre4_0_0_max_entries = 15;
|
||||
constexpr std::size_t post4_0_0_max_entries = 17;
|
||||
constexpr std::size_t pre4_0_0_max_entries = 0xF;
|
||||
constexpr std::size_t post4_0_0_max_entries = 0x40;
|
||||
|
||||
constexpr ResultCode ERR_INVALID_LANGUAGE{ErrorModule::Settings, 625};
|
||||
|
||||
void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t num_language_codes) {
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push(static_cast<u32>(num_language_codes));
|
||||
}
|
||||
|
||||
void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_size) {
|
||||
const std::size_t requested_amount = ctx.GetWriteBufferSize() / sizeof(LanguageCode);
|
||||
const std::size_t copy_amount = std::min(requested_amount, max_size);
|
||||
const std::size_t copy_size = copy_amount * sizeof(LanguageCode);
|
||||
|
||||
ctx.WriteBuffer(available_language_codes.data(), copy_size);
|
||||
PushResponseLanguageCode(ctx, copy_amount);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
LanguageCode GetLanguageCodeFromIndex(std::size_t index) {
|
||||
return available_language_codes.at(index);
|
||||
}
|
||||
|
||||
template <std::size_t size>
|
||||
static std::array<LanguageCode, size> MakeLanguageCodeSubset() {
|
||||
std::array<LanguageCode, size> arr;
|
||||
std::copy_n(available_language_codes.begin(), size, arr.begin());
|
||||
return arr;
|
||||
}
|
||||
|
||||
static void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t max_size) {
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
if (available_language_codes.size() > max_size) {
|
||||
rb.Push(static_cast<u32>(max_size));
|
||||
} else {
|
||||
rb.Push(static_cast<u32>(available_language_codes.size()));
|
||||
}
|
||||
}
|
||||
|
||||
void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_SET, "called");
|
||||
|
||||
GetAvailableLanguageCodesImpl(ctx, pre4_0_0_max_entries);
|
||||
if (available_language_codes.size() > pre4_0_0_max_entries) {
|
||||
ctx.WriteBuffer(MakeLanguageCodeSubset<pre4_0_0_max_entries>());
|
||||
} else {
|
||||
ctx.WriteBuffer(available_language_codes);
|
||||
}
|
||||
PushResponseLanguageCode(ctx, pre4_0_0_max_entries);
|
||||
}
|
||||
|
||||
void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
|
||||
@@ -80,7 +87,12 @@ void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
|
||||
void SET::GetAvailableLanguageCodes2(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_SET, "called");
|
||||
|
||||
GetAvailableLanguageCodesImpl(ctx, post4_0_0_max_entries);
|
||||
if (available_language_codes.size() > post4_0_0_max_entries) {
|
||||
ctx.WriteBuffer(MakeLanguageCodeSubset<post4_0_0_max_entries>());
|
||||
} else {
|
||||
ctx.WriteBuffer(available_language_codes);
|
||||
}
|
||||
PushResponseLanguageCode(ctx, post4_0_0_max_entries);
|
||||
}
|
||||
|
||||
void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) {
|
||||
@@ -90,9 +102,9 @@ void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) {
|
||||
}
|
||||
|
||||
void SET::GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_SET, "called");
|
||||
|
||||
PushResponseLanguageCode(ctx, post4_0_0_max_entries);
|
||||
|
||||
LOG_DEBUG(Service_SET, "called");
|
||||
}
|
||||
|
||||
void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) {
|
||||
|
||||
@@ -17,7 +17,7 @@ namespace Service::VI {
|
||||
|
||||
Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
|
||||
vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
|
||||
fmt::format("Display VSync Event {}", id));
|
||||
}
|
||||
|
||||
|
||||
@@ -556,7 +556,7 @@ private:
|
||||
} else {
|
||||
// Wait the current thread until a buffer becomes available
|
||||
ctx.SleepClientThread(
|
||||
"IHOSBinderDriver::DequeueBuffer", -1,
|
||||
Kernel::GetCurrentThread(), "IHOSBinderDriver::DequeueBuffer", -1,
|
||||
[=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
|
||||
Kernel::ThreadWakeupReason reason) {
|
||||
// Repeat TransactParcel DequeueBuffer when a buffer is available
|
||||
|
||||
@@ -89,6 +89,7 @@ add_library(video_core STATIC
|
||||
shader/decode/conversion.cpp
|
||||
shader/decode/memory.cpp
|
||||
shader/decode/texture.cpp
|
||||
shader/decode/image.cpp
|
||||
shader/decode/float_set_predicate.cpp
|
||||
shader/decode/integer_set_predicate.cpp
|
||||
shader/decode/half_set_predicate.cpp
|
||||
|
||||
@@ -40,13 +40,6 @@ bool DmaPusher::Step() {
|
||||
}
|
||||
|
||||
const CommandList& command_list{dma_pushbuffer.front()};
|
||||
ASSERT_OR_EXECUTE(!command_list.empty(), {
|
||||
// Somehow the command_list is empty, in order to avoid a crash
|
||||
// We ignore it and assume its size is 0.
|
||||
dma_pushbuffer.pop();
|
||||
dma_pushbuffer_subindex = 0;
|
||||
return true;
|
||||
});
|
||||
const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
|
||||
GPUVAddr dma_get = command_list_header.addr;
|
||||
GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
@@ -12,9 +10,7 @@
|
||||
namespace Tegra::Engines::Upload {
|
||||
|
||||
State::State(MemoryManager& memory_manager, Registers& regs)
|
||||
: regs{regs}, memory_manager{memory_manager} {}
|
||||
|
||||
State::~State() = default;
|
||||
: memory_manager(memory_manager), regs(regs) {}
|
||||
|
||||
void State::ProcessExec(const bool is_linear) {
|
||||
write_offset = 0;
|
||||
|
||||
@@ -4,8 +4,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
@@ -55,10 +57,10 @@ struct Registers {
|
||||
class State {
|
||||
public:
|
||||
State(MemoryManager& memory_manager, Registers& regs);
|
||||
~State();
|
||||
~State() = default;
|
||||
|
||||
void ProcessExec(bool is_linear);
|
||||
void ProcessData(u32 data, bool is_last_call);
|
||||
void ProcessExec(const bool is_linear);
|
||||
void ProcessData(const u32 data, const bool is_last_call);
|
||||
|
||||
private:
|
||||
u32 write_offset = 0;
|
||||
|
||||
@@ -34,9 +34,9 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
|
||||
// Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
|
||||
// needed for ARMS.
|
||||
for (auto& viewport : regs.viewports) {
|
||||
viewport.depth_range_near = 0.0f;
|
||||
viewport.depth_range_far = 1.0f;
|
||||
for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) {
|
||||
regs.viewports[viewport].depth_range_near = 0.0f;
|
||||
regs.viewports[viewport].depth_range_far = 1.0f;
|
||||
}
|
||||
|
||||
// Doom and Bomberman seems to use the uninitialized registers and just enable blend
|
||||
@@ -47,13 +47,13 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
regs.blend.equation_a = Regs::Blend::Equation::Add;
|
||||
regs.blend.factor_source_a = Regs::Blend::Factor::One;
|
||||
regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
|
||||
for (auto& blend : regs.independent_blend) {
|
||||
blend.equation_rgb = Regs::Blend::Equation::Add;
|
||||
blend.factor_source_rgb = Regs::Blend::Factor::One;
|
||||
blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
|
||||
blend.equation_a = Regs::Blend::Equation::Add;
|
||||
blend.factor_source_a = Regs::Blend::Factor::One;
|
||||
blend.factor_dest_a = Regs::Blend::Factor::Zero;
|
||||
for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) {
|
||||
regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add;
|
||||
regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One;
|
||||
regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero;
|
||||
regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add;
|
||||
regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One;
|
||||
regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero;
|
||||
}
|
||||
regs.stencil_front_op_fail = Regs::StencilOp::Keep;
|
||||
regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
|
||||
@@ -75,11 +75,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
|
||||
// TODO(bunnei): Some games do not initialize the color masks (e.g. Sonic Mania). Assuming a
|
||||
// default of enabled fixes rendering here.
|
||||
for (auto& color_mask : regs.color_mask) {
|
||||
color_mask.R.Assign(1);
|
||||
color_mask.G.Assign(1);
|
||||
color_mask.B.Assign(1);
|
||||
color_mask.A.Assign(1);
|
||||
for (std::size_t color_mask = 0; color_mask < Regs::NumRenderTargets; color_mask++) {
|
||||
regs.color_mask[color_mask].R.Assign(1);
|
||||
regs.color_mask[color_mask].G.Assign(1);
|
||||
regs.color_mask[color_mask].B.Assign(1);
|
||||
regs.color_mask[color_mask].A.Assign(1);
|
||||
}
|
||||
|
||||
// Commercial games seem to assume this value is enabled and nouveau sets this value manually.
|
||||
@@ -178,13 +178,13 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
|
||||
// Vertex buffer
|
||||
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
|
||||
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
|
||||
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
|
||||
method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
|
||||
method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
|
||||
dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
|
||||
}
|
||||
}
|
||||
@@ -432,17 +432,13 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
|
||||
Texture::TICEntry tic_entry;
|
||||
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
|
||||
|
||||
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
|
||||
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
|
||||
"TIC versions other than BlockLinear or Pitch are unimplemented");
|
||||
|
||||
const auto r_type = tic_entry.r_type.Value();
|
||||
const auto g_type = tic_entry.g_type.Value();
|
||||
const auto b_type = tic_entry.b_type.Value();
|
||||
const auto a_type = tic_entry.a_type.Value();
|
||||
const auto r_type{tic_entry.r_type.Value()};
|
||||
const auto g_type{tic_entry.g_type.Value()};
|
||||
const auto b_type{tic_entry.b_type.Value()};
|
||||
const auto a_type{tic_entry.a_type.Value()};
|
||||
|
||||
// TODO(Subv): Different data types for separate components are not supported
|
||||
DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
|
||||
ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
|
||||
|
||||
return tic_entry;
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
@@ -59,7 +58,6 @@ public:
|
||||
static constexpr std::size_t NumCBData = 16;
|
||||
static constexpr std::size_t NumVertexArrays = 32;
|
||||
static constexpr std::size_t NumVertexAttributes = 32;
|
||||
static constexpr std::size_t NumVaryings = 31;
|
||||
static constexpr std::size_t NumTextureSamplers = 32;
|
||||
static constexpr std::size_t NumClipDistances = 8;
|
||||
static constexpr std::size_t MaxShaderProgram = 6;
|
||||
@@ -1109,7 +1107,6 @@ public:
|
||||
} regs{};
|
||||
|
||||
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
|
||||
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
|
||||
|
||||
struct State {
|
||||
struct ConstBufferInfo {
|
||||
|
||||
@@ -98,10 +98,6 @@ union Attribute {
|
||||
BitField<22, 2, u64> element;
|
||||
BitField<24, 6, Index> index;
|
||||
BitField<47, 3, AttributeSize> size;
|
||||
|
||||
bool IsPhysical() const {
|
||||
return element == 0 && static_cast<u64>(index.Value()) == 0;
|
||||
}
|
||||
} fmt20;
|
||||
|
||||
union {
|
||||
@@ -126,6 +122,15 @@ union Sampler {
|
||||
u64 value{};
|
||||
};
|
||||
|
||||
union Image {
|
||||
Image() = default;
|
||||
|
||||
constexpr explicit Image(u64 value) : value{value} {}
|
||||
|
||||
BitField<36, 13, u64> index;
|
||||
u64 value;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Shader
|
||||
|
||||
namespace std {
|
||||
@@ -344,6 +349,26 @@ enum class TextureMiscMode : u64 {
|
||||
PTP,
|
||||
};
|
||||
|
||||
enum class SurfaceDataMode : u64 {
|
||||
P = 0,
|
||||
D_BA = 1,
|
||||
};
|
||||
|
||||
enum class OutOfBoundsStore : u64 {
|
||||
Ignore = 0,
|
||||
Clamp = 1,
|
||||
Trap = 2,
|
||||
};
|
||||
|
||||
enum class ImageType : u64 {
|
||||
Texture1D = 0,
|
||||
TextureBuffer = 1,
|
||||
Texture1DArray = 2,
|
||||
Texture2D = 3,
|
||||
Texture2DArray = 4,
|
||||
Texture3D = 5,
|
||||
};
|
||||
|
||||
enum class IsberdMode : u64 {
|
||||
None = 0,
|
||||
Patch = 1,
|
||||
@@ -398,7 +423,7 @@ enum class LmemLoadCacheManagement : u64 {
|
||||
CV = 3,
|
||||
};
|
||||
|
||||
enum class LmemStoreCacheManagement : u64 {
|
||||
enum class StoreCacheManagement : u64 {
|
||||
Default = 0,
|
||||
CG = 1,
|
||||
CS = 2,
|
||||
@@ -503,11 +528,6 @@ enum class SystemVariable : u64 {
|
||||
CircularQueueEntryAddressHigh = 0x63,
|
||||
};
|
||||
|
||||
enum class PhysicalAttributeDirection : u64 {
|
||||
Input = 0,
|
||||
Output = 1,
|
||||
};
|
||||
|
||||
union Instruction {
|
||||
Instruction& operator=(const Instruction& instr) {
|
||||
value = instr.value;
|
||||
@@ -529,11 +549,6 @@ union Instruction {
|
||||
BitField<39, 8, Register> gpr39;
|
||||
BitField<48, 16, u64> opcode;
|
||||
|
||||
union {
|
||||
BitField<8, 8, Register> gpr;
|
||||
BitField<20, 24, s64> offset;
|
||||
} gmem;
|
||||
|
||||
union {
|
||||
BitField<20, 16, u64> imm20_16;
|
||||
BitField<20, 19, u64> imm20_19;
|
||||
@@ -601,7 +616,6 @@ union Instruction {
|
||||
} alu;
|
||||
|
||||
union {
|
||||
BitField<38, 1, u64> idx;
|
||||
BitField<51, 1, u64> saturate;
|
||||
BitField<52, 2, IpaSampleMode> sample_mode;
|
||||
BitField<54, 2, IpaInterpMode> interp_mode;
|
||||
@@ -811,30 +825,21 @@ union Instruction {
|
||||
} ld_l;
|
||||
|
||||
union {
|
||||
BitField<44, 2, LmemStoreCacheManagement> cache_management;
|
||||
BitField<44, 2, StoreCacheManagement> cache_management;
|
||||
} st_l;
|
||||
|
||||
union {
|
||||
BitField<48, 3, UniformType> type;
|
||||
BitField<46, 2, u64> cache_mode;
|
||||
BitField<20, 24, s64> immediate_offset;
|
||||
} ldg;
|
||||
|
||||
union {
|
||||
BitField<48, 3, UniformType> type;
|
||||
BitField<46, 2, u64> cache_mode;
|
||||
BitField<20, 24, s64> immediate_offset;
|
||||
} stg;
|
||||
|
||||
union {
|
||||
BitField<32, 1, PhysicalAttributeDirection> direction;
|
||||
BitField<47, 3, AttributeSize> size;
|
||||
BitField<20, 11, u64> address;
|
||||
} al2p;
|
||||
|
||||
union {
|
||||
BitField<53, 3, UniformType> type;
|
||||
BitField<52, 1, u64> extended;
|
||||
} generic;
|
||||
|
||||
union {
|
||||
BitField<0, 3, u64> pred0;
|
||||
BitField<3, 3, u64> pred3;
|
||||
@@ -1231,6 +1236,20 @@ union Instruction {
|
||||
}
|
||||
} texs;
|
||||
|
||||
union {
|
||||
BitField<28, 1, u64> is_array;
|
||||
BitField<29, 2, TextureType> texture_type;
|
||||
BitField<35, 1, u64> aoffi;
|
||||
BitField<49, 1, u64> nodep_flag;
|
||||
BitField<50, 1, u64> ms; // Multisample?
|
||||
BitField<54, 1, u64> cl;
|
||||
BitField<55, 1, u64> process_mode;
|
||||
|
||||
TextureProcessMode GetTextureProcessMode() const {
|
||||
return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
|
||||
}
|
||||
} tld;
|
||||
|
||||
union {
|
||||
BitField<49, 1, u64> nodep_flag;
|
||||
BitField<53, 4, u64> texture_info;
|
||||
@@ -1280,6 +1299,35 @@ union Instruction {
|
||||
}
|
||||
} tlds;
|
||||
|
||||
union {
|
||||
BitField<24, 2, StoreCacheManagement> cache_management;
|
||||
BitField<33, 3, ImageType> image_type;
|
||||
BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
|
||||
BitField<51, 1, u64> is_immediate;
|
||||
BitField<52, 1, SurfaceDataMode> mode;
|
||||
|
||||
BitField<20, 3, StoreType> store_data_layout;
|
||||
BitField<20, 4, u64> component_mask_selector;
|
||||
|
||||
bool IsComponentEnabled(std::size_t component) const {
|
||||
ASSERT(mode == SurfaceDataMode::P);
|
||||
constexpr u8 R = 0b0001;
|
||||
constexpr u8 G = 0b0010;
|
||||
constexpr u8 B = 0b0100;
|
||||
constexpr u8 A = 0b1000;
|
||||
constexpr std::array<u8, 16> mask = {
|
||||
0, (R), (G), (R | G), (B), (R | B),
|
||||
(G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A),
|
||||
(B | A), (R | B | A), (G | B | A), (R | G | B | A)};
|
||||
return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
|
||||
}
|
||||
|
||||
StoreType GetStoreDataLayout() const {
|
||||
ASSERT(mode == SurfaceDataMode::D_BA);
|
||||
return store_data_layout;
|
||||
}
|
||||
} sust;
|
||||
|
||||
union {
|
||||
BitField<20, 24, u64> target;
|
||||
BitField<5, 1, u64> constant_buffer;
|
||||
@@ -1371,6 +1419,7 @@ union Instruction {
|
||||
|
||||
Attribute attribute;
|
||||
Sampler sampler;
|
||||
Image image;
|
||||
|
||||
u64 value;
|
||||
};
|
||||
@@ -1395,24 +1444,23 @@ public:
|
||||
LD_L,
|
||||
LD_S,
|
||||
LD_C,
|
||||
LD, // Load from generic memory
|
||||
LDG, // Load from global memory
|
||||
ST_A,
|
||||
ST_L,
|
||||
ST_S,
|
||||
ST, // Store in generic memory
|
||||
STG, // Store in global memory
|
||||
AL2P, // Transforms attribute memory into physical memory
|
||||
LDG, // Load from global memory
|
||||
STG, // Store in global memory
|
||||
TEX,
|
||||
TEX_B, // Texture Load Bindless
|
||||
TXQ, // Texture Query
|
||||
TXQ_B, // Texture Query Bindless
|
||||
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
|
||||
TLD, // Texture Load
|
||||
TLDS, // Texture Load with scalar/non-vec4 source/destinations
|
||||
TLD4, // Texture Load 4
|
||||
TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
|
||||
TMML_B, // Texture Mip Map Level
|
||||
TMML, // Texture Mip Map Level
|
||||
SUST, // Surface Store
|
||||
EXIT,
|
||||
IPA,
|
||||
OUT_R, // Emit vertex/primitive
|
||||
@@ -1543,6 +1591,7 @@ public:
|
||||
Synch,
|
||||
Memory,
|
||||
Texture,
|
||||
Image,
|
||||
FloatSet,
|
||||
FloatSetPredicate,
|
||||
IntegerSet,
|
||||
@@ -1668,24 +1717,23 @@ private:
|
||||
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
|
||||
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
|
||||
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
|
||||
INST("100-------------", Id::LD, Type::Memory, "LD"),
|
||||
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
|
||||
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
|
||||
INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
|
||||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
||||
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
|
||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
||||
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
||||
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
|
||||
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
|
||||
INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
|
||||
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
|
||||
INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
|
||||
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
|
||||
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
|
||||
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
|
||||
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
|
||||
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
|
||||
INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
|
||||
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
|
||||
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
|
||||
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
|
||||
|
||||
@@ -118,7 +118,7 @@ void SynchState::WaitForSynchronization(u64 fence) {
|
||||
// Wait for the GPU to be idle (all commands to be executed)
|
||||
{
|
||||
MICROPROFILE_SCOPE(GPU_wait);
|
||||
std::unique_lock lock{synchronization_mutex};
|
||||
std::unique_lock<std::mutex> lock{synchronization_mutex};
|
||||
synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,6 +81,12 @@ struct CommandDataContainer {
|
||||
CommandDataContainer(CommandData&& data, u64 next_fence)
|
||||
: data{std::move(data)}, fence{next_fence} {}
|
||||
|
||||
CommandDataContainer& operator=(const CommandDataContainer& t) {
|
||||
data = std::move(t.data);
|
||||
fence = t.fence;
|
||||
return *this;
|
||||
}
|
||||
|
||||
CommandData data;
|
||||
u64 fence{};
|
||||
};
|
||||
@@ -103,7 +109,7 @@ struct SynchState final {
|
||||
|
||||
void TrySynchronize() {
|
||||
if (IsSynchronized()) {
|
||||
std::lock_guard lock{synchronization_mutex};
|
||||
std::lock_guard<std::mutex> lock{synchronization_mutex};
|
||||
synchronization_condition.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,12 +118,10 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||
static_cast<u32>(opcode.operation.Value()));
|
||||
}
|
||||
|
||||
// An instruction with the Exit flag will not actually
|
||||
// cause an exit if it's executed inside a delay slot.
|
||||
// TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further
|
||||
// testing on the MME code.
|
||||
if (opcode.is_exit) {
|
||||
// Exit has a delay slot, execute the next instruction
|
||||
// Note: Executing an exit during a branch delay slot will cause the instruction at the
|
||||
// branch target to be executed before exiting.
|
||||
Step(offset, true);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -144,9 +144,8 @@ protected:
|
||||
|
||||
object->SetIsRegistered(false);
|
||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
|
||||
const CacheAddr addr = object->GetCacheAddr();
|
||||
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
|
||||
map_cache.erase(addr);
|
||||
map_cache.erase(object->GetCacheAddr());
|
||||
}
|
||||
|
||||
/// Returns a ticks counter used for tracking when cached objects were last modified
|
||||
|
||||
@@ -21,15 +21,11 @@ T GetInteger(GLenum pname) {
|
||||
|
||||
Device::Device() {
|
||||
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||
has_variable_aoffi = TestVariableAoffi();
|
||||
}
|
||||
|
||||
Device::Device(std::nullptr_t) {
|
||||
uniform_buffer_alignment = 0;
|
||||
max_vertex_attributes = 16;
|
||||
max_varyings = 15;
|
||||
has_variable_aoffi = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
@@ -18,14 +17,6 @@ public:
|
||||
return uniform_buffer_alignment;
|
||||
}
|
||||
|
||||
u32 GetMaxVertexAttributes() const {
|
||||
return max_vertex_attributes;
|
||||
}
|
||||
|
||||
u32 GetMaxVaryings() const {
|
||||
return max_varyings;
|
||||
}
|
||||
|
||||
bool HasVariableAoffi() const {
|
||||
return has_variable_aoffi;
|
||||
}
|
||||
@@ -34,8 +25,6 @@ private:
|
||||
static bool TestVariableAoffi();
|
||||
|
||||
std::size_t uniform_buffer_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
u32 max_varyings{};
|
||||
bool has_variable_aoffi{};
|
||||
};
|
||||
|
||||
|
||||
@@ -29,8 +29,10 @@
|
||||
namespace OpenGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using PixelFormat = VideoCore::Surface::PixelFormat;
|
||||
using SurfaceType = VideoCore::Surface::SurfaceType;
|
||||
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::SurfaceTarget;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
|
||||
@@ -119,11 +121,6 @@ void RasterizerOpenGL::CheckExtensions() {
|
||||
Render_OpenGL,
|
||||
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
|
||||
}
|
||||
if (!GLAD_GL_ARB_buffer_storage) {
|
||||
LOG_WARNING(
|
||||
Render_OpenGL,
|
||||
"Buffer storage control is not supported! This can cause performance degradation.");
|
||||
}
|
||||
}
|
||||
|
||||
GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
@@ -261,8 +258,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
|
||||
// MakeQuadArray always generates u32 indexes
|
||||
params.index_format = GL_UNSIGNED_INT;
|
||||
params.count = (regs.vertex_buffer.count / 4) * 6;
|
||||
params.index_buffer_offset = primitive_assembler.MakeQuadArray(
|
||||
regs.vertex_buffer.first, regs.vertex_buffer.count);
|
||||
params.index_buffer_offset =
|
||||
primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
|
||||
}
|
||||
return params;
|
||||
}
|
||||
@@ -323,8 +320,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||
|
||||
Shader shader{shader_cache.GetStageProgram(program)};
|
||||
const auto [program_handle, next_bindings] =
|
||||
shader->GetProgramHandle(primitive_mode, base_bindings);
|
||||
|
||||
const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)};
|
||||
SetupConstBuffers(stage_enum, shader, base_bindings);
|
||||
SetupGlobalRegions(stage_enum, shader, base_bindings);
|
||||
const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
|
||||
|
||||
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
|
||||
const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
|
||||
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
@@ -342,11 +345,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
shader_config.enable.Value(), shader_config.offset);
|
||||
}
|
||||
|
||||
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
|
||||
SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
|
||||
SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
|
||||
SetupTextures(stage_enum, shader, program_handle, base_bindings);
|
||||
|
||||
// Workaround for Intel drivers.
|
||||
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
||||
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
|
||||
@@ -809,8 +807,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader, GLuint program_handle,
|
||||
BaseBindings base_bindings) {
|
||||
const Shader& shader, BaseBindings base_bindings) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& gpu = system.GPU();
|
||||
const auto& maxwell3d = gpu.Maxwell3D();
|
||||
@@ -857,8 +854,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader, GLenum primitive_mode,
|
||||
BaseBindings base_bindings) {
|
||||
const Shader& shader, BaseBindings base_bindings) {
|
||||
const auto& entries = shader->GetShaderEntries().global_memory_entries;
|
||||
for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
|
||||
const auto& entry{entries[bindpoint]};
|
||||
@@ -871,8 +867,8 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
|
||||
GLuint program_handle, BaseBindings base_bindings) {
|
||||
TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
|
||||
BaseBindings base_bindings) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
const auto& gpu = system.GPU();
|
||||
const auto& maxwell3d = gpu.Maxwell3D();
|
||||
@@ -881,6 +877,8 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
|
||||
ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
|
||||
"Exceeded the number of active textures.");
|
||||
|
||||
TextureBufferUsage texture_buffer_usage{0};
|
||||
|
||||
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
|
||||
const auto& entry = entries[bindpoint];
|
||||
Tegra::Texture::FullTextureInfo texture;
|
||||
@@ -894,18 +892,25 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
|
||||
}
|
||||
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
|
||||
|
||||
state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc);
|
||||
auto& unit{state.texture_units[current_bindpoint]};
|
||||
unit.sampler = sampler_cache.GetSampler(texture.tsc);
|
||||
|
||||
if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
|
||||
state.texture_units[current_bindpoint].texture =
|
||||
surface->Texture(entry.IsArray()).handle;
|
||||
if (surface->GetSurfaceParams().target == SurfaceTarget::TextureBuffer) {
|
||||
// Record that this texture is a texture buffer.
|
||||
texture_buffer_usage.set(bindpoint);
|
||||
}
|
||||
|
||||
unit.texture = surface->Texture(entry.IsArray()).handle;
|
||||
surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
|
||||
texture.tic.w_source);
|
||||
} else {
|
||||
// Can occur when texture addr is null or its memory is unmapped/invalid
|
||||
state.texture_units[current_bindpoint].texture = 0;
|
||||
unit.texture = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return texture_buffer_usage;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
|
||||
@@ -1135,9 +1140,7 @@ void RasterizerOpenGL::SyncTransformFeedback() {
|
||||
|
||||
void RasterizerOpenGL::SyncPointState() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
|
||||
// in OpenGL).
|
||||
state.point.size = std::max(1.0f, regs.point_size);
|
||||
state.point.size = regs.point_size;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncPolygonOffset() {
|
||||
|
||||
@@ -106,16 +106,16 @@ private:
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
|
||||
GLuint program_handle, BaseBindings base_bindings);
|
||||
BaseBindings base_bindings);
|
||||
|
||||
/// Configures the current global memory entries to use for the draw command.
|
||||
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader, GLenum primitive_mode,
|
||||
BaseBindings base_bindings);
|
||||
const Shader& shader, BaseBindings base_bindings);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
|
||||
GLuint program_handle, BaseBindings base_bindings);
|
||||
/// Configures the current textures to use for the draw command. Returns shaders texture buffer
|
||||
/// usage.
|
||||
TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader, BaseBindings base_bindings);
|
||||
|
||||
/// Syncs the viewport and depth range to match the guest state
|
||||
void SyncViewport(OpenGLState& current_state);
|
||||
|
||||
@@ -140,7 +140,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
|
||||
|
||||
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
|
||||
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
|
||||
if (!params.is_tiled) {
|
||||
if (config.tic.IsLineal()) {
|
||||
params.pitch = config.tic.Pitch();
|
||||
}
|
||||
params.unaligned_height = config.tic.Height();
|
||||
@@ -149,6 +149,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
|
||||
|
||||
switch (params.target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
case SurfaceTarget::Texture2D:
|
||||
params.depth = 1;
|
||||
break;
|
||||
@@ -389,6 +390,8 @@ static GLenum SurfaceTargetToGL(SurfaceTarget target) {
|
||||
switch (target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
return GL_TEXTURE_1D;
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
return GL_TEXTURE_BUFFER;
|
||||
case SurfaceTarget::Texture2D:
|
||||
return GL_TEXTURE_2D;
|
||||
case SurfaceTarget::Texture3D:
|
||||
@@ -600,29 +603,35 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
|
||||
|
||||
switch (params.target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
|
||||
width);
|
||||
glTextureStorage1D(texture.handle, params.max_mip_level, gl_internal_format, width);
|
||||
break;
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
texture_buffer.Create();
|
||||
glNamedBufferStorage(texture_buffer.handle,
|
||||
params.width * GetBytesPerPixel(params.pixel_format), nullptr,
|
||||
GL_DYNAMIC_STORAGE_BIT);
|
||||
glTextureBuffer(texture.handle, gl_internal_format, texture_buffer.handle);
|
||||
break;
|
||||
case SurfaceTarget::Texture2D:
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
|
||||
width, height);
|
||||
glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
|
||||
break;
|
||||
case SurfaceTarget::Texture3D:
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
|
||||
width, height, params.depth);
|
||||
glTextureStorage3D(texture.handle, params.max_mip_level, gl_internal_format, width, height,
|
||||
params.depth);
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
|
||||
static_cast<u32>(params.target));
|
||||
UNREACHABLE();
|
||||
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
|
||||
width, height);
|
||||
glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
|
||||
}
|
||||
|
||||
ApplyTextureDefaults(texture.handle, params.max_mip_level);
|
||||
if (params.target != SurfaceTarget::TextureBuffer) {
|
||||
ApplyTextureDefaults(texture.handle, params.max_mip_level);
|
||||
}
|
||||
|
||||
OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
|
||||
}
|
||||
@@ -785,6 +794,13 @@ void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_t
|
||||
glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
|
||||
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
|
||||
break;
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
ASSERT(mip_map == 0);
|
||||
glNamedBufferSubData(texture_buffer.handle, x0,
|
||||
static_cast<GLsizeiptr>(rect.GetWidth()) *
|
||||
GetBytesPerPixel(params.pixel_format),
|
||||
&gl_buffer[mip_map][buffer_offset]);
|
||||
break;
|
||||
case SurfaceTarget::Texture2D:
|
||||
glTextureSubImage2D(texture.handle, mip_map, x0, y0,
|
||||
static_cast<GLsizei>(rect.GetWidth()),
|
||||
@@ -860,6 +876,9 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
|
||||
Tegra::Texture::SwizzleSource swizzle_y,
|
||||
Tegra::Texture::SwizzleSource swizzle_z,
|
||||
Tegra::Texture::SwizzleSource swizzle_w) {
|
||||
if (params.target == SurfaceTarget::TextureBuffer) {
|
||||
return;
|
||||
}
|
||||
const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
|
||||
const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
|
||||
const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
|
||||
|
||||
@@ -250,6 +250,8 @@ struct SurfaceParams {
|
||||
switch (target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
return "1D";
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
return "Buffer";
|
||||
case SurfaceTarget::Texture2D:
|
||||
return "2D";
|
||||
case SurfaceTarget::Texture3D:
|
||||
@@ -439,6 +441,7 @@ private:
|
||||
|
||||
OGLTexture texture;
|
||||
OGLTexture discrepant_view;
|
||||
OGLBuffer texture_buffer;
|
||||
SurfaceParams params{};
|
||||
GLenum gl_target{};
|
||||
GLenum gl_internal_format{};
|
||||
|
||||
@@ -164,8 +164,12 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
|
||||
}
|
||||
|
||||
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
|
||||
Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
|
||||
GLenum primitive_mode, bool hint_retrievable = false) {
|
||||
Maxwell::ShaderProgram program_type, const ProgramVariant& variant,
|
||||
bool hint_retrievable = false) {
|
||||
auto base_bindings{variant.base_bindings};
|
||||
const auto primitive_mode{variant.primitive_mode};
|
||||
const auto texture_buffer_usage{variant.texture_buffer_usage};
|
||||
|
||||
std::string source = "#version 430 core\n";
|
||||
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
|
||||
|
||||
@@ -181,6 +185,18 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
|
||||
source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
|
||||
base_bindings.sampler++);
|
||||
}
|
||||
for (const auto& image : entries.images) {
|
||||
source +=
|
||||
fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
|
||||
}
|
||||
|
||||
// Transform 1D textures to texture samplers by declaring its preprocessor macros.
|
||||
for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) {
|
||||
if (!texture_buffer_usage.test(i)) {
|
||||
continue;
|
||||
}
|
||||
source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
|
||||
}
|
||||
|
||||
if (program_type == Maxwell::ShaderProgram::Geometry) {
|
||||
const auto [glsl_topology, debug_name, max_vertices] =
|
||||
@@ -256,20 +272,18 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
|
||||
shader_length = entries.shader_length;
|
||||
}
|
||||
|
||||
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
|
||||
BaseBindings base_bindings) {
|
||||
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
|
||||
GLuint handle{};
|
||||
if (program_type == Maxwell::ShaderProgram::Geometry) {
|
||||
handle = GetGeometryShader(primitive_mode, base_bindings);
|
||||
handle = GetGeometryShader(variant);
|
||||
} else {
|
||||
const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
|
||||
const auto [entry, is_cache_miss] = programs.try_emplace(variant);
|
||||
auto& program = entry->second;
|
||||
if (is_cache_miss) {
|
||||
program = TryLoadProgram(primitive_mode, base_bindings);
|
||||
program = TryLoadProgram(variant);
|
||||
if (!program) {
|
||||
program =
|
||||
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
|
||||
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
|
||||
program = SpecializeShader(code, entries, program_type, variant);
|
||||
disk_cache.SaveUsage(GetUsage(variant));
|
||||
}
|
||||
|
||||
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
|
||||
@@ -278,6 +292,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
|
||||
handle = program->handle;
|
||||
}
|
||||
|
||||
auto base_bindings{variant.base_bindings};
|
||||
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
|
||||
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
|
||||
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
|
||||
@@ -285,43 +300,42 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
|
||||
return {handle, base_bindings};
|
||||
}
|
||||
|
||||
GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
|
||||
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
|
||||
GLuint CachedShader::GetGeometryShader(const ProgramVariant& variant) {
|
||||
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(variant);
|
||||
auto& programs = entry->second;
|
||||
|
||||
switch (primitive_mode) {
|
||||
switch (variant.primitive_mode) {
|
||||
case GL_POINTS:
|
||||
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.points, variant);
|
||||
case GL_LINES:
|
||||
case GL_LINE_STRIP:
|
||||
return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.lines, variant);
|
||||
case GL_LINES_ADJACENCY:
|
||||
case GL_LINE_STRIP_ADJACENCY:
|
||||
return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.lines_adjacency, variant);
|
||||
case GL_TRIANGLES:
|
||||
case GL_TRIANGLE_STRIP:
|
||||
case GL_TRIANGLE_FAN:
|
||||
return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.triangles, variant);
|
||||
case GL_TRIANGLES_ADJACENCY:
|
||||
case GL_TRIANGLE_STRIP_ADJACENCY:
|
||||
return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.triangles_adjacency, variant);
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown primitive mode.");
|
||||
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
|
||||
return LazyGeometryProgram(programs.points, variant);
|
||||
}
|
||||
}
|
||||
|
||||
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
|
||||
GLenum primitive_mode) {
|
||||
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program,
|
||||
const ProgramVariant& variant) {
|
||||
if (target_program) {
|
||||
return target_program->handle;
|
||||
}
|
||||
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
|
||||
target_program = TryLoadProgram(primitive_mode, base_bindings);
|
||||
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(variant.primitive_mode);
|
||||
target_program = TryLoadProgram(variant);
|
||||
if (!target_program) {
|
||||
target_program =
|
||||
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
|
||||
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
|
||||
target_program = SpecializeShader(code, entries, program_type, variant);
|
||||
disk_cache.SaveUsage(GetUsage(variant));
|
||||
}
|
||||
|
||||
LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
|
||||
@@ -329,18 +343,19 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
|
||||
return target_program->handle;
|
||||
};
|
||||
|
||||
CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
|
||||
BaseBindings base_bindings) const {
|
||||
const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
|
||||
CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
|
||||
const auto found = precompiled_programs.find(GetUsage(variant));
|
||||
if (found == precompiled_programs.end()) {
|
||||
return {};
|
||||
}
|
||||
return found->second;
|
||||
}
|
||||
|
||||
ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
|
||||
BaseBindings base_bindings) const {
|
||||
return {unique_identifier, base_bindings, primitive_mode};
|
||||
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
|
||||
ShaderDiskCacheUsage usage;
|
||||
usage.unique_identifier = unique_identifier;
|
||||
usage.variant = variant;
|
||||
return usage;
|
||||
}
|
||||
|
||||
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
@@ -394,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
}
|
||||
if (!shader) {
|
||||
shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
|
||||
usage.bindings, usage.primitive, true);
|
||||
usage.variant, true);
|
||||
}
|
||||
precompiled_programs.insert({usage, std::move(shader)});
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
@@ -22,7 +23,7 @@
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
} // namespace Core
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
@@ -63,8 +64,7 @@ public:
|
||||
}
|
||||
|
||||
/// Gets the GL program handle for the shader
|
||||
std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
|
||||
BaseBindings base_bindings);
|
||||
std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
|
||||
|
||||
private:
|
||||
// Geometry programs. These are needed because GLSL needs an input topology but it's not
|
||||
@@ -78,15 +78,14 @@ private:
|
||||
CachedProgram triangles_adjacency;
|
||||
};
|
||||
|
||||
GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
|
||||
GLuint GetGeometryShader(const ProgramVariant& variant);
|
||||
|
||||
/// Generates a geometry shader or returns one that already exists.
|
||||
GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
|
||||
GLenum primitive_mode);
|
||||
GLuint LazyGeometryProgram(CachedProgram& target_program, const ProgramVariant& variant);
|
||||
|
||||
CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
|
||||
CachedProgram TryLoadProgram(const ProgramVariant& variant) const;
|
||||
|
||||
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
|
||||
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
|
||||
|
||||
u8* host_ptr{};
|
||||
VAddr cpu_addr{};
|
||||
@@ -100,8 +99,8 @@ private:
|
||||
|
||||
std::string code;
|
||||
|
||||
std::unordered_map<BaseBindings, CachedProgram> programs;
|
||||
std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
|
||||
std::unordered_map<ProgramVariant, CachedProgram> programs;
|
||||
std::unordered_map<ProgramVariant, GeometryPrograms> geometry_programs;
|
||||
|
||||
std::unordered_map<u32, GLuint> cbuf_resource_cache;
|
||||
std::unordered_map<u32, GLuint> gmem_resource_cache;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -27,6 +27,7 @@ struct ShaderEntries;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using ProgramResult = std::pair<std::string, ShaderEntries>;
|
||||
using SamplerEntry = VideoCommon::Shader::Sampler;
|
||||
using ImageEntry = VideoCommon::Shader::Image;
|
||||
|
||||
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
|
||||
public:
|
||||
@@ -74,6 +75,7 @@ struct ShaderEntries {
|
||||
std::vector<ConstBufferEntry> const_buffers;
|
||||
std::vector<SamplerEntry> samplers;
|
||||
std::vector<SamplerEntry> bindless_samplers;
|
||||
std::vector<ImageEntry> images;
|
||||
std::vector<GlobalMemoryEntry> global_memory_entries;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
std::size_t shader_length{};
|
||||
|
||||
@@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 {
|
||||
Dump,
|
||||
};
|
||||
|
||||
constexpr u32 NativeVersion = 1;
|
||||
constexpr u32 NativeVersion = 3;
|
||||
|
||||
// Making sure sizes doesn't change by accident
|
||||
static_assert(sizeof(BaseBindings) == 12);
|
||||
static_assert(sizeof(ShaderDiskCacheUsage) == 24);
|
||||
static_assert(sizeof(BaseBindings) == 16);
|
||||
static_assert(sizeof(ShaderDiskCacheUsage) == 40);
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -104,9 +104,8 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
|
||||
|
||||
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
|
||||
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
|
||||
: system{system}, precompiled_cache_virtual_file_offset{0} {}
|
||||
|
||||
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
|
||||
ShaderDiskCacheOpenGL::LoadTransferable() {
|
||||
@@ -244,7 +243,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto entry = LoadDecompiledEntry();
|
||||
const auto entry = LoadDecompiledEntry();
|
||||
if (!entry) {
|
||||
return {};
|
||||
}
|
||||
@@ -287,82 +286,97 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
|
||||
if (!LoadObjectFromPrecompiled(code_size)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string code(code_size, '\0');
|
||||
std::vector<u8> code(code_size);
|
||||
if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
|
||||
return {};
|
||||
}
|
||||
|
||||
ShaderDiskCacheDecompiled entry;
|
||||
entry.code = std::move(code);
|
||||
entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
|
||||
|
||||
u32 const_buffers_count{};
|
||||
if (!LoadObjectFromPrecompiled(const_buffers_count)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < const_buffers_count; ++i) {
|
||||
u32 max_offset{};
|
||||
u32 index{};
|
||||
bool is_indirect{};
|
||||
u8 is_indirect{};
|
||||
if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
|
||||
!LoadObjectFromPrecompiled(is_indirect)) {
|
||||
return {};
|
||||
}
|
||||
entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
|
||||
entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
|
||||
}
|
||||
|
||||
u32 samplers_count{};
|
||||
if (!LoadObjectFromPrecompiled(samplers_count)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < samplers_count; ++i) {
|
||||
u64 offset{};
|
||||
u64 index{};
|
||||
u32 type{};
|
||||
bool is_array{};
|
||||
bool is_shadow{};
|
||||
bool is_bindless{};
|
||||
u8 is_array{};
|
||||
u8 is_shadow{};
|
||||
u8 is_bindless{};
|
||||
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
|
||||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
|
||||
!LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
|
||||
return {};
|
||||
}
|
||||
entry.entries.samplers.emplace_back(
|
||||
entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
|
||||
static_cast<std::size_t>(index),
|
||||
static_cast<Tegra::Shader::TextureType>(type),
|
||||
is_array != 0, is_shadow != 0, is_bindless != 0);
|
||||
}
|
||||
|
||||
u32 images_count{};
|
||||
if (!LoadObjectFromPrecompiled(images_count)) {
|
||||
return {};
|
||||
}
|
||||
for (u32 i = 0; i < images_count; ++i) {
|
||||
u64 offset{};
|
||||
u64 index{};
|
||||
u32 type{};
|
||||
u8 is_bindless{};
|
||||
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
|
||||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) {
|
||||
return {};
|
||||
}
|
||||
entry.entries.images.emplace_back(
|
||||
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
|
||||
static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
|
||||
static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0);
|
||||
}
|
||||
|
||||
u32 global_memory_count{};
|
||||
if (!LoadObjectFromPrecompiled(global_memory_count)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < global_memory_count; ++i) {
|
||||
u32 cbuf_index{};
|
||||
u32 cbuf_offset{};
|
||||
bool is_read{};
|
||||
bool is_written{};
|
||||
u8 is_read{};
|
||||
u8 is_written{};
|
||||
if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
|
||||
!LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
|
||||
return {};
|
||||
}
|
||||
entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
|
||||
is_written);
|
||||
entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
|
||||
is_written != 0);
|
||||
}
|
||||
|
||||
for (auto& clip_distance : entry.entries.clip_distances) {
|
||||
if (!LoadObjectFromPrecompiled(clip_distance)) {
|
||||
u8 clip_distance_raw{};
|
||||
if (!LoadObjectFromPrecompiled(clip_distance_raw))
|
||||
return {};
|
||||
}
|
||||
clip_distance = clip_distance_raw != 0;
|
||||
}
|
||||
|
||||
u64 shader_length{};
|
||||
if (!LoadObjectFromPrecompiled(shader_length)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
entry.entries.shader_length = static_cast<std::size_t>(shader_length);
|
||||
|
||||
return entry;
|
||||
@@ -383,7 +397,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
|
||||
for (const auto& cbuf : entries.const_buffers) {
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
|
||||
!SaveObjectToPrecompiled(cbuf.IsIndirect())) {
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -395,9 +409,21 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
|
||||
if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
|
||||
!SaveObjectToPrecompiled(sampler.IsArray()) ||
|
||||
!SaveObjectToPrecompiled(sampler.IsShadow()) ||
|
||||
!SaveObjectToPrecompiled(sampler.IsBindless())) {
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& image : entries.images) {
|
||||
if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -408,13 +434,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
|
||||
for (const auto& gmem : entries.global_memory_entries) {
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
|
||||
!SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (const bool clip_distance : entries.clip_distances) {
|
||||
if (!SaveObjectToPrecompiled(clip_distance)) {
|
||||
if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,14 +33,18 @@ namespace OpenGL {
|
||||
using ProgramCode = std::vector<u64>;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
/// Allocated bindings used by an OpenGL shader program
|
||||
using TextureBufferUsage = std::bitset<64>;
|
||||
|
||||
/// Allocated bindings used by an OpenGL shader program.
|
||||
struct BaseBindings {
|
||||
u32 cbuf{};
|
||||
u32 gmem{};
|
||||
u32 sampler{};
|
||||
u32 image{};
|
||||
|
||||
bool operator==(const BaseBindings& rhs) const {
|
||||
return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
|
||||
return std::tie(cbuf, gmem, sampler, image) ==
|
||||
std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
|
||||
}
|
||||
|
||||
bool operator!=(const BaseBindings& rhs) const {
|
||||
@@ -48,15 +52,29 @@ struct BaseBindings {
|
||||
}
|
||||
};
|
||||
|
||||
/// Describes how a shader is used
|
||||
/// Describes the different variants a single program can be compiled.
|
||||
struct ProgramVariant {
|
||||
BaseBindings base_bindings;
|
||||
GLenum primitive_mode{};
|
||||
TextureBufferUsage texture_buffer_usage{};
|
||||
|
||||
bool operator==(const ProgramVariant& rhs) const {
|
||||
return std::tie(base_bindings, primitive_mode, texture_buffer_usage) ==
|
||||
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage);
|
||||
}
|
||||
|
||||
bool operator!=(const ProgramVariant& rhs) const {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
|
||||
/// Describes how a shader is used.
|
||||
struct ShaderDiskCacheUsage {
|
||||
u64 unique_identifier{};
|
||||
BaseBindings bindings;
|
||||
GLenum primitive{};
|
||||
ProgramVariant variant;
|
||||
|
||||
bool operator==(const ShaderDiskCacheUsage& rhs) const {
|
||||
return std::tie(unique_identifier, bindings, primitive) ==
|
||||
std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
|
||||
return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant);
|
||||
}
|
||||
|
||||
bool operator!=(const ShaderDiskCacheUsage& rhs) const {
|
||||
@@ -70,16 +88,28 @@ namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::BaseBindings> {
|
||||
std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
|
||||
return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
|
||||
std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
|
||||
return static_cast<std::size_t>(bindings.cbuf) ^
|
||||
(static_cast<std::size_t>(bindings.gmem) << 8) ^
|
||||
(static_cast<std::size_t>(bindings.sampler) << 16) ^
|
||||
(static_cast<std::size_t>(bindings.image) << 24);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::ProgramVariant> {
|
||||
std::size_t operator()(const OpenGL::ProgramVariant& variant) const {
|
||||
return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^
|
||||
std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^
|
||||
(static_cast<std::size_t>(variant.primitive_mode) << 6);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::ShaderDiskCacheUsage> {
|
||||
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
|
||||
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
|
||||
return static_cast<std::size_t>(usage.unique_identifier) ^
|
||||
std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
|
||||
std::hash<OpenGL::ProgramVariant>()(usage.variant);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -162,7 +192,6 @@ struct ShaderDiskCacheDump {
|
||||
class ShaderDiskCacheOpenGL {
|
||||
public:
|
||||
explicit ShaderDiskCacheOpenGL(Core::System& system);
|
||||
~ShaderDiskCacheOpenGL();
|
||||
|
||||
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
|
||||
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
|
||||
@@ -260,35 +289,21 @@ private:
|
||||
return SaveArrayToPrecompiled(&object, 1);
|
||||
}
|
||||
|
||||
bool SaveObjectToPrecompiled(bool object) {
|
||||
const auto value = static_cast<u8>(object);
|
||||
return SaveArrayToPrecompiled(&value, 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool LoadObjectFromPrecompiled(T& object) {
|
||||
return LoadArrayFromPrecompiled(&object, 1);
|
||||
}
|
||||
|
||||
bool LoadObjectFromPrecompiled(bool& object) {
|
||||
u8 value;
|
||||
const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
|
||||
if (!read_ok) {
|
||||
return false;
|
||||
}
|
||||
|
||||
object = value != 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Core system
|
||||
Core::System& system;
|
||||
// Stored transferable shaders
|
||||
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
|
||||
// Stores whole precompiled cache which will be read from/saved to the precompiled cache file
|
||||
|
||||
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
|
||||
// file
|
||||
FileSys::VectorVfsFile precompiled_cache_virtual_file;
|
||||
// Stores the current offset of the precompiled cache file for IO purposes
|
||||
std::size_t precompiled_cache_virtual_file_offset = 0;
|
||||
std::size_t precompiled_cache_virtual_file_offset;
|
||||
|
||||
// Stored transferable shaders
|
||||
std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
|
||||
|
||||
// The cache has been loaded at boot
|
||||
bool tried_to_load{};
|
||||
|
||||
@@ -33,14 +33,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
|
||||
};
|
||||
|
||||
)";
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
|
||||
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
|
||||
ProgramResult program =
|
||||
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
|
||||
|
||||
out += program.first;
|
||||
|
||||
if (setup.IsDualProgram()) {
|
||||
const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
|
||||
ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
|
||||
ProgramResult program_b =
|
||||
Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
|
||||
|
||||
@@ -76,7 +76,7 @@ void main() {
|
||||
}
|
||||
})";
|
||||
|
||||
return {std::move(out), std::move(program.second)};
|
||||
return {out, program.second};
|
||||
}
|
||||
|
||||
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
|
||||
@@ -97,7 +97,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
|
||||
};
|
||||
|
||||
)";
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
|
||||
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
|
||||
ProgramResult program =
|
||||
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
|
||||
out += program.first;
|
||||
@@ -107,7 +107,7 @@ void main() {
|
||||
execute_geometry();
|
||||
};)";
|
||||
|
||||
return {std::move(out), std::move(program.second)};
|
||||
return {out, program.second};
|
||||
}
|
||||
|
||||
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
|
||||
@@ -160,7 +160,7 @@ bool AlphaFunc(in float value) {
|
||||
}
|
||||
|
||||
)";
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
|
||||
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
|
||||
ProgramResult program =
|
||||
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
|
||||
|
||||
@@ -172,7 +172,7 @@ void main() {
|
||||
}
|
||||
|
||||
)";
|
||||
return {std::move(out), std::move(program.second)};
|
||||
return {out, program.second};
|
||||
}
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
|
||||
@@ -15,7 +15,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent)
|
||||
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
|
||||
bool use_persistent)
|
||||
: buffer_size(size) {
|
||||
gl_buffer.Create();
|
||||
|
||||
@@ -29,7 +30,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
|
||||
allocate_size *= 2;
|
||||
}
|
||||
|
||||
if (GLAD_GL_ARB_buffer_storage) {
|
||||
if (use_persistent) {
|
||||
persistent = true;
|
||||
coherent = prefer_coherent;
|
||||
const GLbitfield flags =
|
||||
|
||||
@@ -13,7 +13,8 @@ namespace OpenGL {
|
||||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
public:
|
||||
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false);
|
||||
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
|
||||
bool use_persistent = true);
|
||||
~OGLStreamBuffer();
|
||||
|
||||
GLuint GetHandle() const;
|
||||
|
||||
@@ -126,8 +126,6 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
|
||||
return GL_TRIANGLES;
|
||||
case Maxwell::PrimitiveTopology::TriangleStrip:
|
||||
return GL_TRIANGLE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::TriangleFan:
|
||||
return GL_TRIANGLE_FAN;
|
||||
default:
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
|
||||
UNREACHABLE();
|
||||
|
||||
@@ -472,7 +472,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize the renderer
|
||||
bool RendererOpenGL::Init() {
|
||||
Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
|
||||
|
||||
|
||||
@@ -194,8 +194,8 @@ public:
|
||||
for (const auto& sampler : ir.GetSamplers()) {
|
||||
entries.samplers.emplace_back(sampler);
|
||||
}
|
||||
for (const auto& attribute : ir.GetInputAttributes()) {
|
||||
entries.attributes.insert(GetGenericAttributeLocation(attribute));
|
||||
for (const auto& attr : ir.GetInputAttributes()) {
|
||||
entries.attributes.insert(GetGenericAttributeLocation(attr.first));
|
||||
}
|
||||
entries.clip_distances = ir.GetClipDistances();
|
||||
entries.shader_length = ir.GetLength();
|
||||
@@ -321,7 +321,8 @@ private:
|
||||
}
|
||||
|
||||
void DeclareInputAttributes() {
|
||||
for (const auto index : ir.GetInputAttributes()) {
|
||||
for (const auto element : ir.GetInputAttributes()) {
|
||||
const Attribute::Index index = element.first;
|
||||
if (!IsGenericAttribute(index)) {
|
||||
continue;
|
||||
}
|
||||
@@ -929,6 +930,11 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
Id ImageStore(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Id Branch(Operation operation) {
|
||||
const auto target = std::get_if<ImmediateNode>(operation[0]);
|
||||
UNIMPLEMENTED_IF(!target);
|
||||
@@ -1281,6 +1287,8 @@ private:
|
||||
&SPIRVDecompiler::TextureQueryLod,
|
||||
&SPIRVDecompiler::TexelFetch,
|
||||
|
||||
&SPIRVDecompiler::ImageStore,
|
||||
|
||||
&SPIRVDecompiler::Branch,
|
||||
&SPIRVDecompiler::PushFlowStack,
|
||||
&SPIRVDecompiler::PopFlowStack,
|
||||
|
||||
@@ -168,6 +168,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
|
||||
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
|
||||
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
|
||||
{OpCode::Type::Image, &ShaderIR::DecodeImage},
|
||||
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
|
||||
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
|
||||
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
@@ -153,4 +152,4 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
@@ -48,4 +47,4 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -49,4 +49,4 @@ u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -93,4 +93,4 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -46,4 +46,4 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -38,4 +38,4 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -56,4 +56,4 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -55,4 +55,4 @@ u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -53,4 +53,4 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
@@ -65,4 +64,4 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -59,4 +59,4 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
115
src/video_core/shader/decode/image.cpp
Normal file
115
src/video_core/shader/decode/image.cpp
Normal file
@@ -0,0 +1,115 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
namespace {
|
||||
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
|
||||
switch (image_type) {
|
||||
case Tegra::Shader::ImageType::Texture1D:
|
||||
case Tegra::Shader::ImageType::TextureBuffer:
|
||||
return 1;
|
||||
case Tegra::Shader::ImageType::Texture1DArray:
|
||||
case Tegra::Shader::ImageType::Texture2D:
|
||||
return 2;
|
||||
case Tegra::Shader::ImageType::Texture2DArray:
|
||||
case Tegra::Shader::ImageType::Texture3D:
|
||||
return 3;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return 1;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::SUST: {
|
||||
UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
|
||||
UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer);
|
||||
UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
|
||||
UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
|
||||
|
||||
std::vector<Node> values;
|
||||
constexpr std::size_t hardcoded_size{4};
|
||||
for (std::size_t i = 0; i < hardcoded_size; ++i) {
|
||||
values.push_back(GetRegister(instr.gpr0.Value() + i));
|
||||
}
|
||||
|
||||
std::vector<Node> coords;
|
||||
const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
|
||||
for (std::size_t i = 0; i < num_coords; ++i) {
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + i));
|
||||
}
|
||||
|
||||
const auto type{instr.sust.image_type};
|
||||
const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
|
||||
: GetBindlessImage(instr.gpr39, type)};
|
||||
MetaImage meta{image, values};
|
||||
const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))};
|
||||
bb.push_back(store);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
|
||||
const auto offset{static_cast<std::size_t>(image.index.Value())};
|
||||
|
||||
// If this image has already been used, return the existing mapping.
|
||||
const auto itr{std::find_if(used_images.begin(), used_images.end(),
|
||||
[=](const Image& entry) { return entry.GetOffset() == offset; })};
|
||||
if (itr != used_images.end()) {
|
||||
ASSERT(itr->GetType() == type);
|
||||
return *itr;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this image.
|
||||
const std::size_t next_index{used_images.size()};
|
||||
const Image entry{offset, next_index, type};
|
||||
return *used_images.emplace(entry).first;
|
||||
}
|
||||
|
||||
const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
|
||||
Tegra::Shader::ImageType type) {
|
||||
const Node image_register{GetRegister(reg)};
|
||||
const Node base_image{
|
||||
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
|
||||
const auto cbuf{std::get_if<CbufNode>(base_image)};
|
||||
const auto cbuf_offset_imm{std::get_if<ImmediateNode>(cbuf->GetOffset())};
|
||||
const auto cbuf_offset{cbuf_offset_imm->GetValue()};
|
||||
const auto cbuf_index{cbuf->GetIndex()};
|
||||
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
|
||||
|
||||
// If this image has already been used, return the existing mapping.
|
||||
const auto itr{std::find_if(used_images.begin(), used_images.end(),
|
||||
[=](const Image& entry) { return entry.GetOffset() == cbuf_key; })};
|
||||
if (itr != used_images.end()) {
|
||||
ASSERT(itr->GetType() == type);
|
||||
return *itr;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this image.
|
||||
const std::size_t next_index{used_images.size()};
|
||||
const Image entry{cbuf_index, cbuf_offset, next_index, type};
|
||||
return *used_images.emplace(entry).first;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -2,6 +2,7 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
@@ -46,4 +47,4 @@ u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -50,4 +50,4 @@ u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -47,20 +47,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
"Indirect attribute loads are not supported");
|
||||
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
|
||||
"Unaligned attribute loads are not supported");
|
||||
UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
|
||||
instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
|
||||
"Non-32 bits PHYS reads are not implemented");
|
||||
|
||||
const Node buffer{GetRegister(instr.gpr39)};
|
||||
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
|
||||
Tegra::Shader::IpaSampleMode::Default};
|
||||
|
||||
u64 next_element = instr.attribute.fmt20.element;
|
||||
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
|
||||
|
||||
const auto LoadNextElement = [&](u32 reg_offset) {
|
||||
const Node attribute{instr.attribute.fmt20.IsPhysical()
|
||||
? GetPhysicalInputAttribute(instr.gpr8, buffer)
|
||||
: GetInputAttribute(static_cast<Attribute::Index>(next_index),
|
||||
next_element, buffer)};
|
||||
const Node buffer = GetRegister(instr.gpr39);
|
||||
const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
|
||||
next_element, input_mode, buffer);
|
||||
|
||||
SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
|
||||
|
||||
@@ -146,25 +143,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::LD:
|
||||
case OpCode::Id::LDG: {
|
||||
const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::LD:
|
||||
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
|
||||
return instr.generic.type;
|
||||
case OpCode::Id::LDG:
|
||||
return instr.ldg.type;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
}();
|
||||
|
||||
const auto [real_address_base, base_address, descriptor] =
|
||||
TrackAndGetGlobalMemory(bb, instr, false);
|
||||
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
|
||||
static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
|
||||
|
||||
const u32 count = GetUniformTypeElementsCount(type);
|
||||
const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
const Node it_offset = Immediate(i * 4);
|
||||
const Node real_address =
|
||||
@@ -178,6 +162,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::STG: {
|
||||
const auto [real_address_base, base_address, descriptor] =
|
||||
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
|
||||
static_cast<u32>(instr.stg.immediate_offset.Value()), true);
|
||||
|
||||
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
|
||||
SetTemporal(bb, 0, real_address_base);
|
||||
|
||||
const u32 count = GetUniformTypeElementsCount(instr.stg.type);
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
|
||||
}
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
const Node it_offset = Immediate(i * 4);
|
||||
const Node real_address =
|
||||
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
|
||||
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
|
||||
|
||||
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ST_A: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
|
||||
"Indirect attribute loads are not supported");
|
||||
@@ -233,56 +239,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ST:
|
||||
case OpCode::Id::STG: {
|
||||
const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::ST:
|
||||
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
|
||||
return instr.generic.type;
|
||||
case OpCode::Id::STG:
|
||||
return instr.stg.type;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
}();
|
||||
|
||||
const auto [real_address_base, base_address, descriptor] =
|
||||
TrackAndGetGlobalMemory(bb, instr, true);
|
||||
|
||||
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
|
||||
SetTemporal(bb, 0, real_address_base);
|
||||
|
||||
const u32 count = GetUniformTypeElementsCount(type);
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
|
||||
}
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
const Node it_offset = Immediate(i * 4);
|
||||
const Node real_address =
|
||||
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
|
||||
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
|
||||
|
||||
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::AL2P: {
|
||||
// Ignore al2p.direction since we don't care about it.
|
||||
|
||||
// Calculate emulation fake physical address.
|
||||
const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
|
||||
const Node reg{GetRegister(instr.gpr8)};
|
||||
const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
|
||||
|
||||
// Set the fake address to target register.
|
||||
SetRegister(bb, instr.gpr0, fake_address);
|
||||
|
||||
// Signal the shader IR to declare all possible attributes and varyings
|
||||
uses_physical_attributes = true;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
@@ -291,11 +247,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
|
||||
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
|
||||
Instruction instr,
|
||||
Node addr_register,
|
||||
u32 immediate_offset,
|
||||
bool is_write) {
|
||||
const auto addr_register{GetRegister(instr.gmem.gpr)};
|
||||
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
|
||||
|
||||
const Node base_address{
|
||||
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
|
||||
const auto cbuf = std::get_if<CbufNode>(base_address);
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
@@ -131,18 +130,15 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::IPA: {
|
||||
const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
|
||||
|
||||
const auto attribute = instr.attribute.fmt28;
|
||||
const auto& attribute = instr.attribute.fmt28;
|
||||
const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
|
||||
instr.ipa.sample_mode.Value()};
|
||||
|
||||
Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
|
||||
: GetInputAttribute(attribute.index, attribute.element);
|
||||
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
|
||||
Node value = attr;
|
||||
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
|
||||
const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
|
||||
index <= Tegra::Shader::Attribute::Index::Attribute_31;
|
||||
if (is_generic || is_physical) {
|
||||
if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
|
||||
index <= Tegra::Shader::Attribute::Index::Attribute_31) {
|
||||
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
|
||||
// In theory by setting them as perspective, OpenGL does the perspective correction.
|
||||
// A way must figured to reverse the last step of it.
|
||||
|
||||
@@ -64,4 +64,4 @@ u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -43,4 +43,4 @@ u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -48,4 +48,4 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -52,4 +52,4 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -244,6 +244,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
|
||||
|
||||
if (instr.tld.nodep_flag) {
|
||||
LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLDS: {
|
||||
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
|
||||
const bool is_array{instr.tlds.IsArrayTexture()};
|
||||
@@ -574,6 +586,38 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
|
||||
const auto texture_type{instr.tld.texture_type};
|
||||
const bool is_array{instr.tld.is_array};
|
||||
const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
|
||||
const std::size_t coord_count{GetCoordCount(texture_type)};
|
||||
|
||||
u64 gpr8_cursor{instr.gpr8.Value()};
|
||||
const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(gpr8_cursor++));
|
||||
}
|
||||
|
||||
u64 gpr20_cursor{instr.gpr20.Value()};
|
||||
// const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
|
||||
const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
|
||||
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
|
||||
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
|
||||
const std::size_t type_coord_count = GetCoordCount(texture_type);
|
||||
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
|
||||
|
||||
@@ -108,4 +108,4 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -21,13 +21,6 @@ using Tegra::Shader::PredCondition;
|
||||
using Tegra::Shader::PredOperation;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset)
|
||||
: program_code{program_code}, main_offset{main_offset} {
|
||||
Decode();
|
||||
}
|
||||
|
||||
ShaderIR::~ShaderIR() = default;
|
||||
|
||||
Node ShaderIR::StoreNode(NodeData&& node_data) {
|
||||
auto store = std::make_unique<NodeData>(node_data);
|
||||
const Node node = store.get();
|
||||
@@ -39,8 +32,8 @@ Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) {
|
||||
return StoreNode(ConditionalNode(condition, std::move(code)));
|
||||
}
|
||||
|
||||
Node ShaderIR::Comment(std::string text) {
|
||||
return StoreNode(CommentNode(std::move(text)));
|
||||
Node ShaderIR::Comment(const std::string& text) {
|
||||
return StoreNode(CommentNode(text));
|
||||
}
|
||||
|
||||
Node ShaderIR::Immediate(u32 value) {
|
||||
@@ -96,14 +89,13 @@ Node ShaderIR::GetPredicate(bool immediate) {
|
||||
return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
|
||||
used_input_attributes.emplace(index);
|
||||
return StoreNode(AbufNode(index, static_cast<u32>(element), buffer));
|
||||
}
|
||||
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element,
|
||||
const Tegra::Shader::IpaMode& input_mode, Node buffer) {
|
||||
const auto [entry, is_new] =
|
||||
used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{}));
|
||||
entry->second.insert(input_mode);
|
||||
|
||||
Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
|
||||
uses_physical_attributes = true;
|
||||
return StoreNode(AbufNode(GetRegister(physical_address), buffer));
|
||||
return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
|
||||
|
||||
@@ -172,6 +172,8 @@ enum class OperationCode {
|
||||
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
|
||||
TexelFetch, /// (MetaTexture, int[N], int) -> float4
|
||||
|
||||
ImageStore, /// (MetaImage, float[N] coords) -> void
|
||||
|
||||
Branch, /// (uint branch_target) -> void
|
||||
PushFlowStack, /// (uint branch_target) -> void
|
||||
PopFlowStack, /// () -> void
|
||||
@@ -267,6 +269,48 @@ private:
|
||||
bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
|
||||
};
|
||||
|
||||
class Image {
|
||||
public:
|
||||
explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
|
||||
: offset{offset}, index{index}, type{type}, is_bindless{false} {}
|
||||
|
||||
explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
|
||||
Tegra::Shader::ImageType type)
|
||||
: offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
|
||||
is_bindless{true} {}
|
||||
|
||||
explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
|
||||
bool is_bindless)
|
||||
: offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {}
|
||||
|
||||
std::size_t GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
Tegra::Shader::ImageType GetType() const {
|
||||
return type;
|
||||
}
|
||||
|
||||
bool IsBindless() const {
|
||||
return is_bindless;
|
||||
}
|
||||
|
||||
bool operator<(const Image& rhs) const {
|
||||
return std::tie(offset, index, type, is_bindless) <
|
||||
std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
|
||||
}
|
||||
|
||||
private:
|
||||
std::size_t offset{};
|
||||
std::size_t index{};
|
||||
Tegra::Shader::ImageType type{};
|
||||
bool is_bindless{};
|
||||
};
|
||||
|
||||
class ConstBuffer {
|
||||
public:
|
||||
explicit ConstBuffer(u32 max_offset, bool is_indirect)
|
||||
@@ -328,31 +372,45 @@ struct MetaTexture {
|
||||
u32 element{};
|
||||
};
|
||||
|
||||
constexpr MetaArithmetic PRECISE = {true};
|
||||
constexpr MetaArithmetic NO_PRECISE = {false};
|
||||
struct MetaImage {
|
||||
const Image& image;
|
||||
std::vector<Node> values;
|
||||
};
|
||||
|
||||
using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
|
||||
inline constexpr MetaArithmetic PRECISE = {true};
|
||||
inline constexpr MetaArithmetic NO_PRECISE = {false};
|
||||
|
||||
using Meta = std::variant<MetaArithmetic, MetaTexture, MetaImage, Tegra::Shader::HalfType>;
|
||||
|
||||
/// Holds any kind of operation that can be done in the IR
|
||||
class OperationNode final {
|
||||
public:
|
||||
explicit OperationNode(OperationCode code) : code{code} {}
|
||||
|
||||
explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {}
|
||||
template <typename... T>
|
||||
explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {}
|
||||
|
||||
template <typename... T>
|
||||
explicit OperationNode(OperationCode code, const T*... operands)
|
||||
explicit constexpr OperationNode(OperationCode code, Meta&& meta)
|
||||
: code{code}, meta{std::move(meta)} {}
|
||||
|
||||
template <typename... T>
|
||||
explicit constexpr OperationNode(OperationCode code, const T*... operands)
|
||||
: OperationNode(code, {}, operands...) {}
|
||||
|
||||
template <typename... T>
|
||||
explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
|
||||
: code{code}, meta{std::move(meta)}, operands{operands_...} {}
|
||||
explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
|
||||
: code{code}, meta{std::move(meta)} {
|
||||
|
||||
auto operands_list = {operands_...};
|
||||
for (auto& operand : operands_list) {
|
||||
operands.push_back(operand);
|
||||
}
|
||||
}
|
||||
|
||||
explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands)
|
||||
: code{code}, meta{meta}, operands{std::move(operands)} {}
|
||||
|
||||
explicit OperationNode(OperationCode code, std::vector<Node>&& operands)
|
||||
: code{code}, operands{std::move(operands)} {}
|
||||
: code{code}, meta{}, operands{std::move(operands)} {}
|
||||
|
||||
OperationCode GetCode() const {
|
||||
return code;
|
||||
@@ -456,14 +514,17 @@ private:
|
||||
/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
|
||||
class AbufNode final {
|
||||
public:
|
||||
// Initialize for standard attributes (index is explicit).
|
||||
explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
|
||||
const Tegra::Shader::IpaMode& input_mode, Node buffer = {})
|
||||
: input_mode{input_mode}, buffer{buffer}, index{index}, element{element} {}
|
||||
|
||||
explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
|
||||
Node buffer = {})
|
||||
: buffer{buffer}, index{index}, element{element} {}
|
||||
: input_mode{}, buffer{buffer}, index{index}, element{element} {}
|
||||
|
||||
// Initialize for physical attributes (index is a variable value).
|
||||
explicit constexpr AbufNode(Node physical_address, Node buffer = {})
|
||||
: physical_address{physical_address}, buffer{buffer} {}
|
||||
Tegra::Shader::IpaMode GetInputMode() const {
|
||||
return input_mode;
|
||||
}
|
||||
|
||||
Tegra::Shader::Attribute::Index GetIndex() const {
|
||||
return index;
|
||||
@@ -477,19 +538,11 @@ public:
|
||||
return buffer;
|
||||
}
|
||||
|
||||
bool IsPhysicalBuffer() const {
|
||||
return physical_address != nullptr;
|
||||
}
|
||||
|
||||
Node GetPhysicalAddress() const {
|
||||
return physical_address;
|
||||
}
|
||||
|
||||
private:
|
||||
Node physical_address{};
|
||||
Node buffer{};
|
||||
Tegra::Shader::Attribute::Index index{};
|
||||
u32 element{};
|
||||
const Tegra::Shader::IpaMode input_mode;
|
||||
const Node buffer;
|
||||
const Tegra::Shader::Attribute::Index index;
|
||||
const u32 element;
|
||||
};
|
||||
|
||||
/// Constant buffer node, usually mapped to uniform buffers in GLSL
|
||||
@@ -563,8 +616,11 @@ private:
|
||||
|
||||
class ShaderIR final {
|
||||
public:
|
||||
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset);
|
||||
~ShaderIR();
|
||||
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset)
|
||||
: program_code{program_code}, main_offset{main_offset} {
|
||||
|
||||
Decode();
|
||||
}
|
||||
|
||||
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
|
||||
return basic_blocks;
|
||||
@@ -578,7 +634,8 @@ public:
|
||||
return used_predicates;
|
||||
}
|
||||
|
||||
const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
|
||||
const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>&
|
||||
GetInputAttributes() const {
|
||||
return used_input_attributes;
|
||||
}
|
||||
|
||||
@@ -594,6 +651,10 @@ public:
|
||||
return used_samplers;
|
||||
}
|
||||
|
||||
const std::set<Image>& GetImages() const {
|
||||
return used_images;
|
||||
}
|
||||
|
||||
const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
|
||||
const {
|
||||
return used_clip_distances;
|
||||
@@ -607,10 +668,6 @@ public:
|
||||
return static_cast<std::size_t>(coverage_end * sizeof(u64));
|
||||
}
|
||||
|
||||
bool HasPhysicalAttributes() const {
|
||||
return uses_physical_attributes;
|
||||
}
|
||||
|
||||
const Tegra::Shader::Header& GetHeader() const {
|
||||
return header;
|
||||
}
|
||||
@@ -644,6 +701,7 @@ private:
|
||||
u32 DecodeConversion(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeMemory(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeTexture(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeImage(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
|
||||
@@ -663,7 +721,7 @@ private:
|
||||
/// Creates a conditional node
|
||||
Node Conditional(Node condition, std::vector<Node>&& code);
|
||||
/// Creates a commentary
|
||||
Node Comment(std::string text);
|
||||
Node Comment(const std::string& text);
|
||||
/// Creates an u32 immediate
|
||||
Node Immediate(u32 value);
|
||||
/// Creates a s32 immediate
|
||||
@@ -692,9 +750,8 @@ private:
|
||||
/// Generates a predicate node for an immediate true or false value
|
||||
Node GetPredicate(bool immediate);
|
||||
/// Generates a node representing an input attribute. Keeps track of used attributes.
|
||||
Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
|
||||
/// Generates a node representing a physical input attribute.
|
||||
Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
|
||||
Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element,
|
||||
const Tegra::Shader::IpaMode& input_mode, Node buffer = {});
|
||||
/// Generates a node representing an output attribute. Keeps track of used attributes.
|
||||
Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
|
||||
/// Generates a node representing an internal flag
|
||||
@@ -764,6 +821,12 @@ private:
|
||||
Tegra::Shader::TextureType type, bool is_array,
|
||||
bool is_shadow);
|
||||
|
||||
/// Accesses an image.
|
||||
const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
|
||||
|
||||
/// Access a bindless image sampler.
|
||||
const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
|
||||
|
||||
/// Extracts a sequence of bits from a node
|
||||
Node BitfieldExtract(Node value, u32 offset, u32 bits);
|
||||
|
||||
@@ -787,6 +850,8 @@ private:
|
||||
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
bool depth_compare, bool is_array, bool is_aoffi);
|
||||
|
||||
Node4 GetTldCode(Tegra::Shader::Instruction instr);
|
||||
|
||||
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
bool is_array);
|
||||
|
||||
@@ -811,15 +876,16 @@ private:
|
||||
void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
|
||||
Node op_c, Node imm_lut, bool sets_cc);
|
||||
|
||||
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
|
||||
|
||||
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
|
||||
|
||||
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
||||
s64 cursor) const;
|
||||
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
|
||||
|
||||
std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(
|
||||
NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write);
|
||||
std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
|
||||
Node addr_register,
|
||||
u32 immediate_offset,
|
||||
bool is_write);
|
||||
|
||||
template <typename... T>
|
||||
Node Operation(OperationCode code, const T*... operands) {
|
||||
@@ -831,10 +897,12 @@ private:
|
||||
return StoreNode(OperationNode(code, std::move(meta), operands...));
|
||||
}
|
||||
|
||||
template <typename... T>
|
||||
Node Operation(OperationCode code, std::vector<Node>&& operands) {
|
||||
return StoreNode(OperationNode(code, std::move(operands)));
|
||||
}
|
||||
|
||||
template <typename... T>
|
||||
Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) {
|
||||
return StoreNode(OperationNode(code, std::move(meta), std::move(operands)));
|
||||
}
|
||||
@@ -866,13 +934,14 @@ private:
|
||||
|
||||
std::set<u32> used_registers;
|
||||
std::set<Tegra::Shader::Pred> used_predicates;
|
||||
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
|
||||
std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>
|
||||
used_input_attributes;
|
||||
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
|
||||
std::map<u32, ConstBuffer> used_cbufs;
|
||||
std::set<Sampler> used_samplers;
|
||||
std::set<Image> used_images;
|
||||
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
|
||||
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
|
||||
bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
|
||||
|
||||
Tegra::Shader::Header header;
|
||||
};
|
||||
|
||||
@@ -17,24 +17,22 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
|
||||
for (; cursor >= 0; --cursor) {
|
||||
const Node node = code.at(cursor);
|
||||
if (const auto operation = std::get_if<OperationNode>(node)) {
|
||||
if (operation->GetCode() == operation_code) {
|
||||
if (operation->GetCode() == operation_code)
|
||||
return {node, cursor};
|
||||
}
|
||||
}
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(node)) {
|
||||
const auto& conditional_code = conditional->GetCode();
|
||||
const auto [found, internal_cursor] = FindOperation(
|
||||
conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
|
||||
if (found) {
|
||||
if (found)
|
||||
return {found, cursor};
|
||||
}
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const {
|
||||
Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
|
||||
if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
|
||||
// Cbuf found, but it has to be immediate
|
||||
return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
|
||||
@@ -67,7 +65,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
|
||||
std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
|
||||
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
|
||||
// that it uses as operand
|
||||
const auto [found, found_cursor] =
|
||||
@@ -82,7 +80,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
|
||||
}
|
||||
|
||||
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
||||
s64 cursor) const {
|
||||
s64 cursor) {
|
||||
for (; cursor >= 0; --cursor) {
|
||||
const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
|
||||
if (!found_node) {
|
||||
|
||||
@@ -12,6 +12,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
|
||||
switch (texture_type) {
|
||||
case Tegra::Texture::TextureType::Texture1D:
|
||||
return SurfaceTarget::Texture1D;
|
||||
case Tegra::Texture::TextureType::Texture1DBuffer:
|
||||
return SurfaceTarget::TextureBuffer;
|
||||
case Tegra::Texture::TextureType::Texture2D:
|
||||
case Tegra::Texture::TextureType::Texture2DNoMipmap:
|
||||
return SurfaceTarget::Texture2D;
|
||||
@@ -35,6 +37,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
|
||||
bool SurfaceTargetIsLayered(SurfaceTarget target) {
|
||||
switch (target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
case SurfaceTarget::Texture2D:
|
||||
case SurfaceTarget::Texture3D:
|
||||
return false;
|
||||
@@ -53,6 +56,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
|
||||
bool SurfaceTargetIsArray(SurfaceTarget target) {
|
||||
switch (target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
case SurfaceTarget::Texture2D:
|
||||
case SurfaceTarget::Texture3D:
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
|
||||
@@ -114,6 +114,7 @@ enum class SurfaceType {
|
||||
|
||||
enum class SurfaceTarget {
|
||||
Texture1D,
|
||||
TextureBuffer,
|
||||
Texture2D,
|
||||
Texture3D,
|
||||
Texture1DArray,
|
||||
|
||||
@@ -172,12 +172,16 @@ struct TICEntry {
|
||||
BitField<26, 1, u32> use_header_opt_control;
|
||||
BitField<27, 1, u32> depth_texture;
|
||||
BitField<28, 4, u32> max_mip_level;
|
||||
|
||||
BitField<0, 16, u32> buffer_high_width_minus_one;
|
||||
};
|
||||
union {
|
||||
BitField<0, 16, u32> width_minus_1;
|
||||
BitField<22, 1, u32> srgb_conversion;
|
||||
BitField<23, 4, TextureType> texture_type;
|
||||
BitField<29, 3, u32> border_size;
|
||||
|
||||
BitField<0, 16, u32> buffer_low_width_minus_one;
|
||||
};
|
||||
union {
|
||||
BitField<0, 16, u32> height_minus_1;
|
||||
@@ -206,7 +210,10 @@ struct TICEntry {
|
||||
}
|
||||
|
||||
u32 Width() const {
|
||||
return width_minus_1 + 1;
|
||||
if (header_version != TICHeaderVersion::OneDBuffer) {
|
||||
return width_minus_1 + 1;
|
||||
}
|
||||
return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one;
|
||||
}
|
||||
|
||||
u32 Height() const {
|
||||
@@ -240,6 +247,15 @@ struct TICEntry {
|
||||
header_version == TICHeaderVersion::BlockLinearColorKey;
|
||||
}
|
||||
|
||||
bool IsLineal() const {
|
||||
return header_version == TICHeaderVersion::Pitch ||
|
||||
header_version == TICHeaderVersion::PitchColorKey;
|
||||
}
|
||||
|
||||
bool IsBuffer() const {
|
||||
return header_version == TICHeaderVersion::OneDBuffer;
|
||||
}
|
||||
|
||||
bool IsSrgbConversionEnabled() const {
|
||||
return srgb_conversion != 0;
|
||||
}
|
||||
|
||||
@@ -82,6 +82,8 @@ add_executable(yuzu
|
||||
util/limitable_input_dialog.h
|
||||
util/sequence_dialog/sequence_dialog.cpp
|
||||
util/sequence_dialog/sequence_dialog.h
|
||||
util/spinbox.cpp
|
||||
util/spinbox.h
|
||||
util/util.cpp
|
||||
util/util.h
|
||||
compatdb.cpp
|
||||
|
||||
@@ -9,10 +9,10 @@
|
||||
|
||||
AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) {
|
||||
ui->setupUi(this);
|
||||
ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200));
|
||||
ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg(
|
||||
QString::fromUtf8(Common::g_build_fullname), QString::fromUtf8(Common::g_scm_branch),
|
||||
QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10)));
|
||||
ui->labelLogo->setPixmap(QIcon::fromTheme("yuzu").pixmap(200));
|
||||
ui->labelBuildInfo->setText(
|
||||
ui->labelBuildInfo->text().arg(Common::g_build_fullname, Common::g_scm_branch,
|
||||
Common::g_scm_desc, QString(Common::g_build_date).left(10)));
|
||||
}
|
||||
|
||||
AboutDialog::~AboutDialog() = default;
|
||||
|
||||
@@ -54,6 +54,6 @@ void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_te
|
||||
|
||||
void QtErrorDisplay::MainWindowFinishedError() {
|
||||
// Acquire the HLE mutex
|
||||
std::lock_guard lock{HLE::g_hle_lock};
|
||||
std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
|
||||
callback();
|
||||
}
|
||||
|
||||
@@ -84,10 +84,10 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
|
||||
tree_view->setContextMenuPolicy(Qt::NoContextMenu);
|
||||
|
||||
item_model->insertColumns(0, 1);
|
||||
item_model->setHeaderData(0, Qt::Horizontal, tr("Users"));
|
||||
item_model->setHeaderData(0, Qt::Horizontal, "Users");
|
||||
|
||||
// We must register all custom types with the Qt Automoc system so that we are able to use it
|
||||
// with signals/slots. In this case, QList falls under the umbrella of custom types.
|
||||
// with signals/slots. In this case, QList falls under the umbrells of custom types.
|
||||
qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>");
|
||||
|
||||
layout->setContentsMargins(0, 0, 0, 0);
|
||||
|
||||
@@ -188,9 +188,7 @@ private:
|
||||
GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
|
||||
: QWidget(parent), emu_thread(emu_thread) {
|
||||
setWindowTitle(QStringLiteral("yuzu %1 | %2-%3")
|
||||
.arg(QString::fromUtf8(Common::g_build_name),
|
||||
QString::fromUtf8(Common::g_scm_branch),
|
||||
QString::fromUtf8(Common::g_scm_desc)));
|
||||
.arg(Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc));
|
||||
setAttribute(Qt::WA_AcceptTouchEvents);
|
||||
|
||||
InputCommon::Init();
|
||||
@@ -219,7 +217,7 @@ void GRenderWindow::SwapBuffers() {
|
||||
// However:
|
||||
// - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
|
||||
// since the last time `swapBuffers` was executed;
|
||||
// - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks.
|
||||
// - On macOS, if `makeCurrent` isn't called explicitely, resizing the buffer breaks.
|
||||
context->makeCurrent(child);
|
||||
|
||||
context->swapBuffers(child);
|
||||
@@ -381,7 +379,6 @@ void GRenderWindow::InitRenderTarget() {
|
||||
fmt.setVersion(4, 3);
|
||||
if (Settings::values.use_compatibility_profile) {
|
||||
fmt.setProfile(QSurfaceFormat::CompatibilityProfile);
|
||||
fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
|
||||
} else {
|
||||
fmt.setProfile(QSurfaceFormat::CoreProfile);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -9,6 +9,7 @@
|
||||
#include <string>
|
||||
#include <QVariant>
|
||||
#include "core/settings.h"
|
||||
#include "yuzu/ui_settings.h"
|
||||
|
||||
class QSettings;
|
||||
|
||||
@@ -36,51 +37,19 @@ private:
|
||||
void ReadTouchscreenValues();
|
||||
void ApplyDefaultProfileIfInputInvalid();
|
||||
|
||||
// Read functions bases off the respective config section names.
|
||||
void ReadAudioValues();
|
||||
void ReadControlValues();
|
||||
void ReadCoreValues();
|
||||
void ReadDataStorageValues();
|
||||
void ReadDebuggingValues();
|
||||
void ReadDisabledAddOnValues();
|
||||
void ReadMiscellaneousValues();
|
||||
void ReadPathValues();
|
||||
void ReadRendererValues();
|
||||
void ReadShortcutValues();
|
||||
void ReadSystemValues();
|
||||
void ReadUIValues();
|
||||
void ReadUIGamelistValues();
|
||||
void ReadUILayoutValues();
|
||||
void ReadWebServiceValues();
|
||||
|
||||
void SaveValues();
|
||||
void SavePlayerValues();
|
||||
void SaveDebugValues();
|
||||
void SaveMouseValues();
|
||||
void SaveTouchscreenValues();
|
||||
|
||||
// Save functions based off the respective config section names.
|
||||
void SaveAudioValues();
|
||||
void SaveControlValues();
|
||||
void SaveCoreValues();
|
||||
void SaveDataStorageValues();
|
||||
void SaveDebuggingValues();
|
||||
void SaveDisabledAddOnValues();
|
||||
void SaveMiscellaneousValues();
|
||||
void SavePathValues();
|
||||
void SaveRendererValues();
|
||||
void SaveShortcutValues();
|
||||
void SaveSystemValues();
|
||||
void SaveUIValues();
|
||||
void SaveUIGamelistValues();
|
||||
void SaveUILayoutValues();
|
||||
void SaveWebServiceValues();
|
||||
|
||||
QVariant ReadSetting(const QString& name) const;
|
||||
QVariant ReadSetting(const QString& name, const QVariant& default_value) const;
|
||||
void WriteSetting(const QString& name, const QVariant& value);
|
||||
void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value);
|
||||
|
||||
static const std::array<UISettings::Shortcut, 15> default_hotkeys;
|
||||
|
||||
std::unique_ptr<QSettings> qt_config;
|
||||
std::string qt_config_loc;
|
||||
};
|
||||
|
||||
@@ -16,21 +16,21 @@ ConfigureAudio::ConfigureAudio(QWidget* parent)
|
||||
ui->setupUi(this);
|
||||
|
||||
ui->output_sink_combo_box->clear();
|
||||
ui->output_sink_combo_box->addItem(QString::fromUtf8(AudioCore::auto_device_name));
|
||||
ui->output_sink_combo_box->addItem("auto");
|
||||
for (const char* id : AudioCore::GetSinkIDs()) {
|
||||
ui->output_sink_combo_box->addItem(QString::fromUtf8(id));
|
||||
ui->output_sink_combo_box->addItem(id);
|
||||
}
|
||||
|
||||
connect(ui->volume_slider, &QSlider::valueChanged, this,
|
||||
&ConfigureAudio::setVolumeIndicatorText);
|
||||
|
||||
this->setConfiguration();
|
||||
connect(ui->output_sink_combo_box, qOverload<int>(&QComboBox::currentIndexChanged), this,
|
||||
connect(ui->output_sink_combo_box,
|
||||
static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this,
|
||||
&ConfigureAudio::updateAudioDevices);
|
||||
|
||||
const bool is_powered_on = Core::System::GetInstance().IsPoweredOn();
|
||||
ui->output_sink_combo_box->setEnabled(!is_powered_on);
|
||||
ui->audio_device_combo_box->setEnabled(!is_powered_on);
|
||||
ui->output_sink_combo_box->setEnabled(!Core::System::GetInstance().IsPoweredOn());
|
||||
ui->audio_device_combo_box->setEnabled(!Core::System::GetInstance().IsPoweredOn());
|
||||
}
|
||||
|
||||
ConfigureAudio::~ConfigureAudio() = default;
|
||||
@@ -94,7 +94,7 @@ void ConfigureAudio::applyConfiguration() {
|
||||
|
||||
void ConfigureAudio::updateAudioDevices(int sink_index) {
|
||||
ui->audio_device_combo_box->clear();
|
||||
ui->audio_device_combo_box->addItem(QString::fromUtf8(AudioCore::auto_device_name));
|
||||
ui->audio_device_combo_box->addItem(AudioCore::auto_device_name);
|
||||
|
||||
const std::string sink_id = ui->output_sink_combo_box->itemText(sink_index).toStdString();
|
||||
for (const auto& device : AudioCore::GetDeviceListForSink(sink_id)) {
|
||||
|
||||
@@ -100,15 +100,13 @@ void ConfigureGameList::RetranslateUI() {
|
||||
|
||||
void ConfigureGameList::InitializeIconSizeComboBox() {
|
||||
for (const auto& size : default_icon_sizes) {
|
||||
ui->icon_size_combobox->addItem(QString::fromUtf8(size.second), size.first);
|
||||
ui->icon_size_combobox->addItem(size.second, size.first);
|
||||
}
|
||||
}
|
||||
|
||||
void ConfigureGameList::InitializeRowComboBoxes() {
|
||||
for (std::size_t i = 0; i < row_text_names.size(); ++i) {
|
||||
const QString row_text_name = QString::fromUtf8(row_text_names[i]);
|
||||
|
||||
ui->row_1_text_combobox->addItem(row_text_name, QVariant::fromValue(i));
|
||||
ui->row_2_text_combobox->addItem(row_text_name, QVariant::fromValue(i));
|
||||
ui->row_1_text_combobox->addItem(row_text_names[i], QVariant::fromValue(i));
|
||||
ui->row_2_text_combobox->addItem(row_text_names[i], QVariant::fromValue(i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,8 +14,7 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
|
||||
ui->setupUi(this);
|
||||
|
||||
for (const auto& theme : UISettings::themes) {
|
||||
ui->theme_combobox->addItem(QString::fromUtf8(theme.first),
|
||||
QString::fromUtf8(theme.second));
|
||||
ui->theme_combobox->addItem(theme.first, theme.second);
|
||||
}
|
||||
|
||||
this->setConfiguration();
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user