Compare commits

..

9 Commits

Author SHA1 Message Date
ReinUsesLisp
9a8c1745f1 gl_shader_decompiler: Implement image binding settings 2019-05-16 20:03:51 -03:00
ReinUsesLisp
f96d50165f shader: Implement bindless images 2019-05-16 20:03:51 -03:00
ReinUsesLisp
f9f541470e shader: Decode SUST and implement backing image functionality 2019-05-16 20:03:51 -03:00
ReinUsesLisp
ce691745dc gl_rasterizer: Track texture buffer usage 2019-05-16 20:03:51 -03:00
ReinUsesLisp
1d59af8f7c video_core: Make ARB_buffer_storage a required extension 2019-05-16 20:03:50 -03:00
ReinUsesLisp
a6252257eb gl_rasterizer_cache: Use texture buffers to emulate texture buffers 2019-05-16 20:03:50 -03:00
ReinUsesLisp
dc5e5ac3b0 maxwell_3d: Partially implement texture buffers as 1D textures 2019-05-16 18:55:20 -03:00
ReinUsesLisp
4f612052b2 gl_shader_decompiler: Allow 1D textures to be texture buffers 2019-05-16 18:55:20 -03:00
ReinUsesLisp
89eef17670 shader: Implement texture buffers 2019-05-16 18:55:20 -03:00
123 changed files with 2859 additions and 5247 deletions

View File

@@ -132,7 +132,7 @@ find_package(Threads REQUIRED)
if (ENABLE_SDL2)
if (YUZU_USE_BUNDLED_SDL2)
# Detect toolchain and platform
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
set(SDL2_VER "SDL2-2.0.8")
else()
message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.")
@@ -165,7 +165,7 @@ if (YUZU_USE_BUNDLED_UNICORN)
if (MSVC)
message(STATUS "unicorn not found, falling back to bundled")
# Detect toolchain and platform
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
set(UNICORN_VER "unicorn-yuzu")
else()
message(FATAL_ERROR "No bundled Unicorn binaries for your toolchain. Disable YUZU_USE_BUNDLED_UNICORN and provide your own.")
@@ -233,7 +233,7 @@ endif()
if (ENABLE_QT)
if (YUZU_USE_BUNDLED_QT)
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
set(QT_VER qt-5.12.0-msvc2017_64)
else()
message(FATAL_ERROR "No bundled Qt binaries for your toolchain. Disable YUZU_USE_BUNDLED_QT and provide your own.")

View File

@@ -70,6 +70,7 @@ set(HASH_FILES
"${VIDEO_CORE}/shader/decode/half_set.cpp"
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
"${VIDEO_CORE}/shader/decode/image.cpp"
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"

View File

@@ -90,20 +90,12 @@
* int arg2) KHRONOS_APIATTRIBUTES;
*/
#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC)
# define KHRONOS_STATIC 1
#endif
/*-------------------------------------------------------------------------
* Definition of KHRONOS_APICALL
*-------------------------------------------------------------------------
* This precedes the return type of the function in the function prototype.
*/
#if defined(KHRONOS_STATIC)
/* If the preprocessor constant KHRONOS_STATIC is defined, make the
* header compatible with static linking. */
# define KHRONOS_APICALL
#elif defined(_WIN32)
#if defined(_WIN32) && !defined(__SCITECH_SNAP__)
# define KHRONOS_APICALL __declspec(dllimport)
#elif defined (__SYMBIAN32__)
# define KHRONOS_APICALL IMPORT_C
@@ -119,7 +111,7 @@
* This follows the return type of the function and precedes the function
* name in the function prototype.
*/
#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(KHRONOS_STATIC)
#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__)
/* Win32 but not WinCE */
# define KHRONOS_APIENTRY __stdcall
#else

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/decode/half_set.cpp"
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
"${VIDEO_CORE}/shader/decode/image.cpp"
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"

View File

@@ -438,7 +438,7 @@ inline float RequestParser::Pop() {
template <>
inline double RequestParser::Pop() {
const u64 value = Pop<u64>();
double real;
float real;
std::memcpy(&real, &value, sizeof(real));
return real;
}

View File

@@ -43,7 +43,7 @@ void SessionRequestHandler::ClientDisconnected(const SharedPtr<ServerSession>& s
}
SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
const std::string& reason, u64 timeout, WakeupCallback&& callback,
SharedPtr<Thread> thread, const std::string& reason, u64 timeout, WakeupCallback&& callback,
SharedPtr<WritableEvent> writable_event) {
// Put the client thread to sleep until the wait event is signaled or the timeout expires.
thread->SetWakeupCallback([context = *this, callback](
@@ -58,7 +58,7 @@ SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
auto& kernel = Core::System::GetInstance().Kernel();
if (!writable_event) {
// Create event if not provided
const auto pair = WritableEvent::CreateEventPair(kernel, ResetType::Automatic,
const auto pair = WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"HLE Pause Event: " + reason);
writable_event = pair.writable;
}
@@ -76,9 +76,8 @@ SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
return writable_event;
}
HLERequestContext::HLERequestContext(SharedPtr<Kernel::ServerSession> server_session,
SharedPtr<Thread> thread)
: server_session(std::move(server_session)), thread(std::move(thread)) {
HLERequestContext::HLERequestContext(SharedPtr<Kernel::ServerSession> server_session)
: server_session(std::move(server_session)) {
cmd_buf[0] = 0;
}

View File

@@ -97,7 +97,7 @@ protected:
*/
class HLERequestContext {
public:
explicit HLERequestContext(SharedPtr<ServerSession> session, SharedPtr<Thread> thread);
explicit HLERequestContext(SharedPtr<ServerSession> session);
~HLERequestContext();
/// Returns a pointer to the IPC command buffer for this request.
@@ -119,6 +119,7 @@ public:
/**
* Puts the specified guest thread to sleep until the returned event is signaled or until the
* specified timeout expires.
* @param thread Thread to be put to sleep.
* @param reason Reason for pausing the thread, to be used for debugging purposes.
* @param timeout Timeout in nanoseconds after which the thread will be awoken and the callback
* invoked with a Timeout reason.
@@ -129,8 +130,8 @@ public:
* created.
* @returns Event that when signaled will resume the thread and call the callback function.
*/
SharedPtr<WritableEvent> SleepClientThread(const std::string& reason, u64 timeout,
WakeupCallback&& callback,
SharedPtr<WritableEvent> SleepClientThread(SharedPtr<Thread> thread, const std::string& reason,
u64 timeout, WakeupCallback&& callback,
SharedPtr<WritableEvent> writable_event = nullptr);
/// Populates this context with data from the requesting process/thread.
@@ -267,7 +268,6 @@ private:
std::array<u32, IPC::COMMAND_BUFFER_LENGTH> cmd_buf;
SharedPtr<Kernel::ServerSession> server_session;
SharedPtr<Thread> thread;
// TODO(yuriks): Check common usage of this and optimize size accordingly
boost::container::small_vector<SharedPtr<Object>, 8> move_objects;
boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;

View File

@@ -33,8 +33,8 @@ enum class HandleType : u32 {
};
enum class ResetType {
Automatic, ///< Reset automatically on object acquisition
Manual, ///< Never reset automatically
OneShot, ///< Reset automatically on object acquisition
Sticky, ///< Never reset automatically
};
class Object : NonCopyable {

View File

@@ -21,9 +21,8 @@ bool ReadableEvent::ShouldWait(const Thread* thread) const {
void ReadableEvent::Acquire(Thread* thread) {
ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
if (reset_type == ResetType::Automatic) {
if (reset_type == ResetType::OneShot)
signaled = false;
}
}
void ReadableEvent::Signal() {

View File

@@ -130,7 +130,7 @@ ResultCode ServerSession::HandleSyncRequest(SharedPtr<Thread> thread) {
// The ServerSession received a sync request, this means that there's new data available
// from its ClientSession, so wake up any threads that may be waiting on a svcReplyAndReceive or
// similar.
Kernel::HLERequestContext context(this, thread);
Kernel::HLERequestContext context(this);
u32* cmd_buf = (u32*)Memory::GetPointer(thread->GetTLSAddress());
context.PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);

View File

@@ -1255,8 +1255,8 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
return vm_manager.MapCodeMemory(dst_address, src_address, size);
}
static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle,
u64 dst_address, u64 src_address, u64 size) {
ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address,
u64 src_address, u64 size) {
LOG_DEBUG(Kernel_SVC,
"called. process_handle=0x{:08X}, dst_address=0x{:016X}, src_address=0x{:016X}, "
"size=0x{:016X}",
@@ -1342,7 +1342,7 @@ static void ExitProcess(Core::System& system) {
/// Creates a new thread
static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg,
VAddr stack_top, u32 priority, s32 processor_id) {
LOG_DEBUG(Kernel_SVC,
LOG_TRACE(Kernel_SVC,
"called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, "
"threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}",
entry_point, arg, stack_top, priority, processor_id, *out_handle);
@@ -1402,7 +1402,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
/// Starts the thread for the provided handle
static ResultCode StartThread(Core::System& system, Handle thread_handle) {
LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
@@ -1425,7 +1425,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
/// Called when a thread exits
static void ExitThread(Core::System& system) {
LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
current_thread->Stop();
@@ -1435,7 +1435,7 @@ static void ExitThread(Core::System& system) {
/// Sleep the current thread
static void SleepThread(Core::System& system, s64 nanoseconds) {
LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds);
LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
enum class SleepType : s64 {
YieldWithoutLoadBalancing = 0,
@@ -1880,51 +1880,11 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
}
static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
u64 affinity_mask) {
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}",
thread_handle, core, affinity_mask);
u64 mask) {
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle,
mask, core);
const auto* const current_process = system.Kernel().CurrentProcess();
if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
const u8 ideal_cpu_core = current_process->GetIdealCore();
ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
// Set the target CPU to the ideal core specified by the process.
core = ideal_cpu_core;
affinity_mask = 1ULL << core;
} else {
const u64 core_mask = current_process->GetCoreMask();
if ((core_mask | affinity_mask) != core_mask) {
LOG_ERROR(
Kernel_SVC,
"Invalid processor ID specified (core_mask=0x{:08X}, affinity_mask=0x{:016X})",
core_mask, affinity_mask);
return ERR_INVALID_PROCESSOR_ID;
}
if (affinity_mask == 0) {
LOG_ERROR(Kernel_SVC, "Specfified affinity mask is zero.");
return ERR_INVALID_COMBINATION;
}
if (core < Core::NUM_CPU_CORES) {
if ((affinity_mask & (1ULL << core)) == 0) {
LOG_ERROR(Kernel_SVC,
"Core is not enabled for the current mask, core={}, mask={:016X}", core,
affinity_mask);
return ERR_INVALID_COMBINATION;
}
} else if (core != static_cast<u32>(THREADPROCESSORID_DONT_CARE) &&
core != static_cast<u32>(THREADPROCESSORID_DONT_UPDATE)) {
LOG_ERROR(Kernel_SVC, "Invalid processor ID specified (core={}).", core);
return ERR_INVALID_PROCESSOR_ID;
}
}
const auto& handle_table = current_process->GetHandleTable();
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
if (!thread) {
LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1932,7 +1892,40 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
return ERR_INVALID_HANDLE;
}
thread->ChangeCore(core, affinity_mask);
if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
const u8 ideal_cpu_core = thread->GetOwnerProcess()->GetIdealCore();
ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
// Set the target CPU to the ideal core specified by the process.
core = ideal_cpu_core;
mask = 1ULL << core;
}
if (mask == 0) {
LOG_ERROR(Kernel_SVC, "Mask is 0");
return ERR_INVALID_COMBINATION;
}
/// This value is used to only change the affinity mask without changing the current ideal core.
static constexpr u32 OnlyChangeMask = static_cast<u32>(-3);
if (core == OnlyChangeMask) {
core = thread->GetIdealCore();
} else if (core >= Core::NUM_CPU_CORES && core != static_cast<u32>(-1)) {
LOG_ERROR(Kernel_SVC, "Invalid core specified, got {}", core);
return ERR_INVALID_PROCESSOR_ID;
}
// Error out if the input core isn't enabled in the input mask.
if (core < Core::NUM_CPU_CORES && (mask & (1ull << core)) == 0) {
LOG_ERROR(Kernel_SVC, "Core is not enabled for the current mask, core={}, mask={:016X}",
core, mask);
return ERR_INVALID_COMBINATION;
}
thread->ChangeCore(core, mask);
return RESULT_SUCCESS;
}
@@ -1987,7 +1980,7 @@ static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle
auto& kernel = system.Kernel();
const auto [readable_event, writable_event] =
WritableEvent::CreateEventPair(kernel, ResetType::Manual, "CreateEvent");
WritableEvent::CreateEventPair(kernel, ResetType::Sticky, "CreateEvent");
HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable();
@@ -2190,8 +2183,8 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
return RESULT_SUCCESS;
}
static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
u32 out_thread_ids_size, Handle debug_handle) {
ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
u32 out_thread_ids_size, Handle debug_handle) {
// TODO: Handle this case when debug events are supported.
UNIMPLEMENTED_IF(debug_handle != InvalidHandle);

View File

@@ -30,21 +30,12 @@ enum ThreadPriority : u32 {
};
enum ThreadProcessorId : s32 {
/// Indicates that no particular processor core is preferred.
THREADPROCESSORID_DONT_CARE = -1,
/// Run thread on the ideal core specified by the process.
THREADPROCESSORID_IDEAL = -2,
/// Indicates that the preferred processor ID shouldn't be updated in
/// a core mask setting operation.
THREADPROCESSORID_DONT_UPDATE = -3,
THREADPROCESSORID_0 = 0, ///< Run thread on core 0
THREADPROCESSORID_1 = 1, ///< Run thread on core 1
THREADPROCESSORID_2 = 2, ///< Run thread on core 2
THREADPROCESSORID_3 = 3, ///< Run thread on core 3
THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this
THREADPROCESSORID_IDEAL = -2, ///< Run thread on the ideal core specified by the process.
THREADPROCESSORID_0 = 0, ///< Run thread on core 0
THREADPROCESSORID_1 = 1, ///< Run thread on core 1
THREADPROCESSORID_2 = 2, ///< Run thread on core 2
THREADPROCESSORID_3 = 3, ///< Run thread on core 3
THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this
/// Allowed CPU mask
THREADPROCESSORID_DEFAULT_MASK = (1 << THREADPROCESSORID_0) | (1 << THREADPROCESSORID_1) |

View File

@@ -276,7 +276,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
"ISelfController:LaunchableEvent");
}
@@ -442,10 +442,10 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c
AppletMessageQueue::AppletMessageQueue() {
auto& kernel = Core::System::GetInstance().Kernel();
on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
"AMMessageQueue:OnMessageRecieved");
on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "AMMessageQueue:OperationModeChanged");
kernel, Kernel::ResetType::OneShot, "AMMessageQueue:OperationModeChanged");
}
AppletMessageQueue::~AppletMessageQueue() = default;
@@ -835,7 +835,6 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
return;
}
std::memcpy(backing.buffer.data() + offset, data.data(), data.size());
@@ -858,7 +857,6 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
return;
}
ctx.WriteBuffer(backing.buffer.data() + offset, size);

View File

@@ -26,11 +26,11 @@ namespace Service::AM::Applets {
AppletDataBroker::AppletDataBroker() {
auto& kernel = Core::System::GetInstance().Kernel();
state_changed_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent");
kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:StateChangedEvent");
pop_out_data_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopDataOutEvent");
kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopDataOutEvent");
pop_interactive_out_data_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
}
AppletDataBroker::~AppletDataBroker() = default;

View File

@@ -68,7 +68,7 @@ AOC_U::AOC_U() : ServiceFramework("aoc:u"), add_on_content(AccumulateAOCTitleIDs
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
"GetAddOnContentListChanged:Event");
}

View File

@@ -67,7 +67,7 @@ public:
// This is the event handle used to check if the audio buffer was released
auto& system = Core::System::GetInstance();
buffer_event = Kernel::WritableEvent::CreateEventPair(
system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased");
system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");
stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
audio_params.channel_count, std::move(unique_name),

View File

@@ -8,7 +8,6 @@
#include "audio_core/audio_renderer.h"
#include "common/alignment.h"
#include "common/bit_util.h"
#include "common/common_funcs.h"
#include "common/logging/log.h"
#include "common/string_util.h"
@@ -47,7 +46,7 @@ public:
auto& system = Core::System::GetInstance();
system_event = Kernel::WritableEvent::CreateEventPair(
system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent");
system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
system_event.writable);
}
@@ -179,7 +178,7 @@ public:
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IAudioOutBufferReleasedEvent");
}
@@ -263,304 +262,64 @@ void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
OpenAudioRendererImpl(ctx);
}
static u64 CalculateNumPerformanceEntries(const AudioCore::AudioRendererParameter& params) {
// +1 represents the final mix.
return u64{params.effect_count} + params.submix_count + params.sink_count + params.voice_count +
1;
}
void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
LOG_DEBUG(Service_Audio, "called");
// Several calculations below align the sizes being calculated
// onto a 64 byte boundary.
static constexpr u64 buffer_alignment_size = 64;
u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
buffer_sz += params.submix_count * 1024;
buffer_sz += 0x940 * (params.submix_count + 1);
buffer_sz += 0x3F0 * params.voice_count;
buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
buffer_sz += Common::AlignUp(
(0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
(params.mix_buffer_count + 6),
0x40);
// Some calculations that calculate portions of the buffer
// that will contain information, on the other hand, align
// the result of some of their calcularions on a 16 byte boundary.
static constexpr u64 info_field_alignment_size = 16;
// Maximum detail entries that may exist at one time for performance
// frame statistics.
static constexpr u64 max_perf_detail_entries = 100;
// Size of the data structure representing the bulk of the voice-related state.
static constexpr u64 voice_state_size = 0x100;
// Size of the upsampler manager data structure
constexpr u64 upsampler_manager_size = 0x48;
// Calculates the part of the size that relates to mix buffers.
const auto calculate_mix_buffer_sizes = [](const AudioCore::AudioRendererParameter& params) {
// As of 8.0.0 this is the maximum on voice channels.
constexpr u64 max_voice_channels = 6;
// The service expects the sample_count member of the parameters to either be
// a value of 160 or 240, so the maximum sample count is assumed in order
// to adequately handle all values at runtime.
constexpr u64 default_max_sample_count = 240;
const u64 total_mix_buffers = params.mix_buffer_count + max_voice_channels;
u64 size = 0;
size += total_mix_buffers * (sizeof(s32) * params.sample_count);
size += total_mix_buffers * (sizeof(s32) * default_max_sample_count);
size += u64{params.submix_count} + params.sink_count;
size = Common::AlignUp(size, buffer_alignment_size);
size += Common::AlignUp(params.unknown_30, buffer_alignment_size);
size += Common::AlignUp(sizeof(s32) * params.mix_buffer_count, buffer_alignment_size);
return size;
};
// Calculates the portion of the size related to the mix data (and the sorting thereof).
const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) {
// The size of the mixing info data structure.
constexpr u64 mix_info_size = 0x940;
// Consists of total submixes with the final mix included.
const u64 total_mix_count = u64{params.submix_count} + 1;
// The total number of effects that may be available to the audio renderer at any time.
constexpr u64 max_effects = 256;
// Calculates the part of the size related to the audio node state.
// This will only be used if the audio revision supports the splitter.
const auto calculate_node_state_size = [](std::size_t num_nodes) {
// Internally within a nodestate, it appears to use a data structure
// similar to a std::bitset<64> twice.
constexpr u64 bit_size = Common::BitSize<u64>();
constexpr u64 num_bitsets = 2;
// Node state instances have three states internally for performing
// depth-first searches of nodes. Initialized, Found, and Done Sorting.
constexpr u64 num_states = 3;
u64 size = 0;
size += (num_nodes * num_nodes) * sizeof(s32);
size += num_states * (num_nodes * sizeof(s32));
size += num_bitsets * (Common::AlignUp(num_nodes, bit_size) / Common::BitSize<u8>());
return size;
};
// Calculates the part of the size related to the adjacency (aka edge) matrix.
const auto calculate_edge_matrix_size = [](std::size_t num_nodes) {
return (num_nodes * num_nodes) * sizeof(s32);
};
u64 size = 0;
size += Common::AlignUp(sizeof(void*) * total_mix_count, info_field_alignment_size);
size += Common::AlignUp(mix_info_size * total_mix_count, info_field_alignment_size);
size += Common::AlignUp(sizeof(s32) * max_effects * params.submix_count,
info_field_alignment_size);
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
size += Common::AlignUp(calculate_node_state_size(total_mix_count) +
calculate_edge_matrix_size(total_mix_count),
info_field_alignment_size);
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
const u32 count = params.submix_count + 1;
u64 node_count = Common::AlignUp(count, 0x40);
const u64 node_state_buffer_sz =
4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
u64 edge_matrix_buffer_sz = 0;
node_count = Common::AlignUp(count * count, 0x40);
if (node_count >> 31 != 0) {
edge_matrix_buffer_sz = (node_count | 7) / 8;
} else {
edge_matrix_buffer_sz = node_count / 8;
}
buffer_sz += Common::AlignUp(node_state_buffer_sz + edge_matrix_buffer_sz, 0x10);
}
return size;
};
buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
buffer_sz += 0xE0 * params.num_splitter_send_channels;
buffer_sz += 0x20 * params.splitter_count;
buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
}
buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
((params.voice_count * 256) | 0x40);
// Calculates the part of the size related to voice channel info.
const auto calculate_voice_info_size = [](const AudioCore::AudioRendererParameter& params) {
constexpr u64 voice_info_size = 0x220;
constexpr u64 voice_resource_size = 0xD0;
u64 size = 0;
size += Common::AlignUp(sizeof(void*) * params.voice_count, info_field_alignment_size);
size += Common::AlignUp(voice_info_size * params.voice_count, info_field_alignment_size);
size +=
Common::AlignUp(voice_resource_size * params.voice_count, info_field_alignment_size);
size += Common::AlignUp(voice_state_size * params.voice_count, info_field_alignment_size);
return size;
};
// Calculates the part of the size related to memory pools.
const auto calculate_memory_pools_size = [](const AudioCore::AudioRendererParameter& params) {
const u64 num_memory_pools = sizeof(s32) * (u64{params.effect_count} + params.voice_count);
const u64 memory_pool_info_size = 0x20;
return Common::AlignUp(num_memory_pools * memory_pool_info_size, info_field_alignment_size);
};
// Calculates the part of the size related to the splitter context.
const auto calculate_splitter_context_size =
[this](const AudioCore::AudioRendererParameter& params) -> u64 {
if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
return 0;
}
constexpr u64 splitter_info_size = 0x20;
constexpr u64 splitter_destination_data_size = 0xE0;
u64 size = 0;
size += params.num_splitter_send_channels;
size +=
Common::AlignUp(splitter_info_size * params.splitter_count, info_field_alignment_size);
size += Common::AlignUp(splitter_destination_data_size * params.num_splitter_send_channels,
info_field_alignment_size);
return size;
};
// Calculates the part of the size related to the upsampler info.
const auto calculate_upsampler_info_size = [](const AudioCore::AudioRendererParameter& params) {
constexpr u64 upsampler_info_size = 0x280;
// Yes, using the buffer size over info alignment size is intentional here.
return Common::AlignUp(upsampler_info_size * (u64{params.submix_count} + params.sink_count),
buffer_alignment_size);
};
// Calculates the part of the size related to effect info.
const auto calculate_effect_info_size = [](const AudioCore::AudioRendererParameter& params) {
constexpr u64 effect_info_size = 0x2B0;
return Common::AlignUp(effect_info_size * params.effect_count, info_field_alignment_size);
};
// Calculates the part of the size related to audio sink info.
const auto calculate_sink_info_size = [](const AudioCore::AudioRendererParameter& params) {
const u64 sink_info_size = 0x170;
return Common::AlignUp(sink_info_size * params.sink_count, info_field_alignment_size);
};
// Calculates the part of the size related to voice state info.
const auto calculate_voice_state_size = [](const AudioCore::AudioRendererParameter& params) {
const u64 voice_state_size = 0x100;
const u64 additional_size = buffer_alignment_size - 1;
return Common::AlignUp(voice_state_size * params.voice_count + additional_size,
info_field_alignment_size);
};
// Calculates the part of the size related to performance statistics.
const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) {
// Extra size value appended to the end of the calculation.
constexpr u64 appended = 128;
// Whether or not we assume the newer version of performance metrics data structures.
const bool is_v2 =
IsFeatureSupported(AudioFeatures::PerformanceMetricsVersion2, params.revision);
// Data structure sizes
constexpr u64 perf_statistics_size = 0x0C;
const u64 header_size = is_v2 ? 0x30 : 0x18;
const u64 entry_size = is_v2 ? 0x18 : 0x10;
const u64 detail_size = is_v2 ? 0x18 : 0x10;
const u64 entry_count = CalculateNumPerformanceEntries(params);
const u64 size_per_frame =
header_size + (entry_size * entry_count) + (detail_size * max_perf_detail_entries);
u64 size = 0;
size += Common::AlignUp(size_per_frame * params.performance_frame_count + 1,
buffer_alignment_size);
size += Common::AlignUp(perf_statistics_size, buffer_alignment_size);
size += appended;
return size;
};
// Calculates the part of the size that relates to the audio command buffer.
const auto calculate_command_buffer_size =
[this](const AudioCore::AudioRendererParameter& params) {
constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) {
constexpr u64 command_buffer_size = 0x18000;
return command_buffer_size + alignment;
}
// When the variadic command buffer is supported, this means
// the command generator for the audio renderer can issue commands
// that are (as one would expect), variable in size. So what we need to do
// is determine the maximum possible size for a few command data structures
// then multiply them by the amount of present commands indicated by the given
// respective audio parameters.
constexpr u64 max_biquad_filters = 2;
constexpr u64 max_mix_buffers = 24;
constexpr u64 biquad_filter_command_size = 0x2C;
constexpr u64 depop_mix_command_size = 0x24;
constexpr u64 depop_setup_command_size = 0x50;
constexpr u64 effect_command_max_size = 0x540;
constexpr u64 mix_command_size = 0x1C;
constexpr u64 mix_ramp_command_size = 0x24;
constexpr u64 mix_ramp_grouped_command_size = 0x13C;
constexpr u64 perf_command_size = 0x28;
constexpr u64 sink_command_size = 0x130;
constexpr u64 submix_command_max_size =
depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers;
constexpr u64 volume_command_size = 0x1C;
constexpr u64 volume_ramp_command_size = 0x20;
constexpr u64 voice_biquad_filter_command_size =
biquad_filter_command_size * max_biquad_filters;
constexpr u64 voice_data_command_size = 0x9C;
const u64 voice_command_max_size =
(params.splitter_count * depop_setup_command_size) +
(voice_data_command_size + voice_biquad_filter_command_size +
volume_ramp_command_size + mix_ramp_grouped_command_size);
// Now calculate the individual elements that comprise the size and add them together.
const u64 effect_commands_size = params.effect_count * effect_command_max_size;
const u64 final_mix_commands_size =
depop_mix_command_size + volume_command_size * max_mix_buffers;
const u64 perf_commands_size =
perf_command_size *
(CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
const u64 sink_commands_size = params.sink_count * sink_command_size;
const u64 splitter_commands_size =
params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size;
const u64 submix_commands_size = params.submix_count * submix_command_max_size;
const u64 voice_commands_size = params.voice_count * voice_command_max_size;
return effect_commands_size + final_mix_commands_size + perf_commands_size +
sink_commands_size + splitter_commands_size + submix_commands_size +
voice_commands_size + alignment;
};
IPC::RequestParser rp{ctx};
const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
u64 size = 0;
size += calculate_mix_buffer_sizes(params);
size += calculate_mix_info_size(params);
size += calculate_voice_info_size(params);
size += upsampler_manager_size;
size += calculate_memory_pools_size(params);
size += calculate_splitter_context_size(params);
size = Common::AlignUp(size, buffer_alignment_size);
size += calculate_upsampler_info_size(params);
size += calculate_effect_info_size(params);
size += calculate_sink_info_size(params);
size += calculate_voice_state_size(params);
size += calculate_perf_size(params);
size += calculate_command_buffer_size(params);
// finally, 4KB page align the size, and we're done.
size = Common::AlignUp(size, 4096);
if (params.performance_frame_count >= 1) {
output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
16 * params.voice_count + 16) +
0x658) *
(params.performance_frame_count + 1) +
0xc0,
0x40) +
output_sz;
}
output_sz = Common::AlignUp(output_sz + 0x1807e, 0x1000);
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(RESULT_SUCCESS);
rb.Push<u64>(size);
LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", size);
rb.Push(RESULT_SUCCESS);
rb.Push<u64>(output_sz);
LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz);
}
void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
@@ -598,15 +357,10 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
}
bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
// Byte swap
const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0');
u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
switch (feature) {
case AudioFeatures::Splitter:
return version_num >= 2U;
case AudioFeatures::PerformanceMetricsVersion2:
case AudioFeatures::VariadicCommandBuffer:
return version_num >= 5U;
return version_num >= 2u;
default:
return false;
}

View File

@@ -28,8 +28,6 @@ private:
enum class AudioFeatures : u32 {
Splitter,
PerformanceMetricsVersion2,
VariadicCommandBuffer,
};
bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const;

View File

@@ -34,8 +34,8 @@ public:
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
register_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "BT:RegisterEvent");
register_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"BT:RegisterEvent");
}
private:

View File

@@ -57,13 +57,13 @@ public:
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IBtmUserCore:ScanEvent");
connection_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IBtmUserCore:ConnectionEvent");
kernel, Kernel::ResetType::OneShot, "IBtmUserCore:ConnectionEvent");
service_discovery = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IBtmUserCore:Discovery");
config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
kernel, Kernel::ResetType::OneShot, "IBtmUserCore:Discovery");
config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IBtmUserCore:ConfigEvent");
}

View File

@@ -170,7 +170,7 @@ void Controller_NPad::InitNewlyAddedControler(std::size_t controller_idx) {
void Controller_NPad::OnInit() {
auto& kernel = Core::System::GetInstance().Kernel();
styleset_changed_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "npad:NpadStyleSetChanged");
kernel, Kernel::ResetType::OneShot, "npad:NpadStyleSetChanged");
if (!IsControllerActivated()) {
return;

View File

@@ -26,7 +26,7 @@ constexpr ResultCode ERR_NO_APPLICATION_AREA(ErrorModule::NFP, 152);
Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
: ServiceFramework(name), module(std::move(module)) {
auto& kernel = Core::System::GetInstance().Kernel();
nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IUser:NFCTagDetected");
}
@@ -67,9 +67,9 @@ public:
auto& kernel = Core::System::GetInstance().Kernel();
deactivate_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IUser:DeactivateEvent");
kernel, Kernel::ResetType::OneShot, "IUser:DeactivateEvent");
availability_change_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IUser:AvailabilityChangeEvent");
kernel, Kernel::ResetType::OneShot, "IUser:AvailabilityChangeEvent");
}
private:

View File

@@ -62,9 +62,9 @@ public:
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IRequest:Event1");
event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IRequest:Event2");
}

View File

@@ -141,7 +141,7 @@ public:
auto& kernel = Core::System::GetInstance().Kernel();
finished_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic,
kernel, Kernel::ResetType::OneShot,
"IEnsureNetworkClockAvailabilityService:FinishEvent");
}

View File

@@ -129,7 +129,7 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"NVDRV::query_event");
}

View File

@@ -16,7 +16,7 @@ namespace Service::NVFlinger {
BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) {
auto& kernel = Core::System::GetInstance().Kernel();
buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
"BufferQueue NativeHandle");
}

View File

@@ -2,15 +2,16 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <chrono>
#include "common/logging/log.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/client_session.h"
#include "core/hle/service/set/set.h"
#include "core/settings.h"
namespace Service::Set {
namespace {
constexpr std::array<LanguageCode, 17> available_language_codes = {{
LanguageCode::JA,
LanguageCode::EN_US,
@@ -31,35 +32,41 @@ constexpr std::array<LanguageCode, 17> available_language_codes = {{
LanguageCode::ZH_HANT,
}};
constexpr std::size_t pre4_0_0_max_entries = 15;
constexpr std::size_t post4_0_0_max_entries = 17;
constexpr std::size_t pre4_0_0_max_entries = 0xF;
constexpr std::size_t post4_0_0_max_entries = 0x40;
constexpr ResultCode ERR_INVALID_LANGUAGE{ErrorModule::Settings, 625};
void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t num_language_codes) {
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push(static_cast<u32>(num_language_codes));
}
void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_size) {
const std::size_t requested_amount = ctx.GetWriteBufferSize() / sizeof(LanguageCode);
const std::size_t copy_amount = std::min(requested_amount, max_size);
const std::size_t copy_size = copy_amount * sizeof(LanguageCode);
ctx.WriteBuffer(available_language_codes.data(), copy_size);
PushResponseLanguageCode(ctx, copy_amount);
}
} // Anonymous namespace
LanguageCode GetLanguageCodeFromIndex(std::size_t index) {
return available_language_codes.at(index);
}
template <std::size_t size>
static std::array<LanguageCode, size> MakeLanguageCodeSubset() {
std::array<LanguageCode, size> arr;
std::copy_n(available_language_codes.begin(), size, arr.begin());
return arr;
}
static void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t max_size) {
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
if (available_language_codes.size() > max_size) {
rb.Push(static_cast<u32>(max_size));
} else {
rb.Push(static_cast<u32>(available_language_codes.size()));
}
}
void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SET, "called");
GetAvailableLanguageCodesImpl(ctx, pre4_0_0_max_entries);
if (available_language_codes.size() > pre4_0_0_max_entries) {
ctx.WriteBuffer(MakeLanguageCodeSubset<pre4_0_0_max_entries>());
} else {
ctx.WriteBuffer(available_language_codes);
}
PushResponseLanguageCode(ctx, pre4_0_0_max_entries);
}
void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
@@ -80,7 +87,12 @@ void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
void SET::GetAvailableLanguageCodes2(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SET, "called");
GetAvailableLanguageCodesImpl(ctx, post4_0_0_max_entries);
if (available_language_codes.size() > post4_0_0_max_entries) {
ctx.WriteBuffer(MakeLanguageCodeSubset<post4_0_0_max_entries>());
} else {
ctx.WriteBuffer(available_language_codes);
}
PushResponseLanguageCode(ctx, post4_0_0_max_entries);
}
void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) {
@@ -90,9 +102,9 @@ void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) {
}
void SET::GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SET, "called");
PushResponseLanguageCode(ctx, post4_0_0_max_entries);
LOG_DEBUG(Service_SET, "called");
}
void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) {

View File

@@ -17,7 +17,7 @@ namespace Service::VI {
Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
auto& kernel = Core::System::GetInstance().Kernel();
vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
fmt::format("Display VSync Event {}", id));
}

View File

@@ -556,7 +556,7 @@ private:
} else {
// Wait the current thread until a buffer becomes available
ctx.SleepClientThread(
"IHOSBinderDriver::DequeueBuffer", -1,
Kernel::GetCurrentThread(), "IHOSBinderDriver::DequeueBuffer", -1,
[=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
Kernel::ThreadWakeupReason reason) {
// Repeat TransactParcel DequeueBuffer when a buffer is available

View File

@@ -89,6 +89,7 @@ add_library(video_core STATIC
shader/decode/conversion.cpp
shader/decode/memory.cpp
shader/decode/texture.cpp
shader/decode/image.cpp
shader/decode/float_set_predicate.cpp
shader/decode/integer_set_predicate.cpp
shader/decode/half_set_predicate.cpp

View File

@@ -40,13 +40,6 @@ bool DmaPusher::Step() {
}
const CommandList& command_list{dma_pushbuffer.front()};
ASSERT_OR_EXECUTE(!command_list.empty(), {
// Somehow the command_list is empty, in order to avoid a crash
// We ignore it and assume its size is 0.
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
return true;
});
const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
GPUVAddr dma_get = command_list_header.addr;
GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);

View File

@@ -2,8 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include "common/assert.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h"
@@ -12,9 +10,7 @@
namespace Tegra::Engines::Upload {
State::State(MemoryManager& memory_manager, Registers& regs)
: regs{regs}, memory_manager{memory_manager} {}
State::~State() = default;
: memory_manager(memory_manager), regs(regs) {}
void State::ProcessExec(const bool is_linear) {
write_offset = 0;

View File

@@ -4,8 +4,10 @@
#pragma once
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra {
@@ -55,10 +57,10 @@ struct Registers {
class State {
public:
State(MemoryManager& memory_manager, Registers& regs);
~State();
~State() = default;
void ProcessExec(bool is_linear);
void ProcessData(u32 data, bool is_last_call);
void ProcessExec(const bool is_linear);
void ProcessData(const u32 data, const bool is_last_call);
private:
u32 write_offset = 0;

View File

@@ -34,9 +34,9 @@ void Maxwell3D::InitializeRegisterDefaults() {
// Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
// needed for ARMS.
for (auto& viewport : regs.viewports) {
viewport.depth_range_near = 0.0f;
viewport.depth_range_far = 1.0f;
for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) {
regs.viewports[viewport].depth_range_near = 0.0f;
regs.viewports[viewport].depth_range_far = 1.0f;
}
// Doom and Bomberman seems to use the uninitialized registers and just enable blend
@@ -47,13 +47,13 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.blend.equation_a = Regs::Blend::Equation::Add;
regs.blend.factor_source_a = Regs::Blend::Factor::One;
regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
for (auto& blend : regs.independent_blend) {
blend.equation_rgb = Regs::Blend::Equation::Add;
blend.factor_source_rgb = Regs::Blend::Factor::One;
blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
blend.equation_a = Regs::Blend::Equation::Add;
blend.factor_source_a = Regs::Blend::Factor::One;
blend.factor_dest_a = Regs::Blend::Factor::Zero;
for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) {
regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add;
regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One;
regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero;
regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add;
regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One;
regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero;
}
regs.stencil_front_op_fail = Regs::StencilOp::Keep;
regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
@@ -75,11 +75,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
// TODO(bunnei): Some games do not initialize the color masks (e.g. Sonic Mania). Assuming a
// default of enabled fixes rendering here.
for (auto& color_mask : regs.color_mask) {
color_mask.R.Assign(1);
color_mask.G.Assign(1);
color_mask.B.Assign(1);
color_mask.A.Assign(1);
for (std::size_t color_mask = 0; color_mask < Regs::NumRenderTargets; color_mask++) {
regs.color_mask[color_mask].R.Assign(1);
regs.color_mask[color_mask].G.Assign(1);
regs.color_mask[color_mask].B.Assign(1);
regs.color_mask[color_mask].A.Assign(1);
}
// Commercial games seem to assume this value is enabled and nouveau sets this value manually.
@@ -178,13 +178,13 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
// Vertex buffer
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
}
}
@@ -432,17 +432,13 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
"TIC versions other than BlockLinear or Pitch are unimplemented");
const auto r_type = tic_entry.r_type.Value();
const auto g_type = tic_entry.g_type.Value();
const auto b_type = tic_entry.b_type.Value();
const auto a_type = tic_entry.a_type.Value();
const auto r_type{tic_entry.r_type.Value()};
const auto g_type{tic_entry.g_type.Value()};
const auto b_type{tic_entry.b_type.Value()};
const auto a_type{tic_entry.a_type.Value()};
// TODO(Subv): Different data types for separate components are not supported
DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
return tic_entry;
}

View File

@@ -6,7 +6,6 @@
#include <array>
#include <bitset>
#include <type_traits>
#include <unordered_map>
#include <vector>
@@ -59,7 +58,6 @@ public:
static constexpr std::size_t NumCBData = 16;
static constexpr std::size_t NumVertexArrays = 32;
static constexpr std::size_t NumVertexAttributes = 32;
static constexpr std::size_t NumVaryings = 31;
static constexpr std::size_t NumTextureSamplers = 32;
static constexpr std::size_t NumClipDistances = 8;
static constexpr std::size_t MaxShaderProgram = 6;
@@ -1109,7 +1107,6 @@ public:
} regs{};
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
struct State {
struct ConstBufferInfo {

View File

@@ -98,10 +98,6 @@ union Attribute {
BitField<22, 2, u64> element;
BitField<24, 6, Index> index;
BitField<47, 3, AttributeSize> size;
bool IsPhysical() const {
return element == 0 && static_cast<u64>(index.Value()) == 0;
}
} fmt20;
union {
@@ -126,6 +122,15 @@ union Sampler {
u64 value{};
};
union Image {
Image() = default;
constexpr explicit Image(u64 value) : value{value} {}
BitField<36, 13, u64> index;
u64 value;
};
} // namespace Tegra::Shader
namespace std {
@@ -344,6 +349,26 @@ enum class TextureMiscMode : u64 {
PTP,
};
enum class SurfaceDataMode : u64 {
P = 0,
D_BA = 1,
};
enum class OutOfBoundsStore : u64 {
Ignore = 0,
Clamp = 1,
Trap = 2,
};
enum class ImageType : u64 {
Texture1D = 0,
TextureBuffer = 1,
Texture1DArray = 2,
Texture2D = 3,
Texture2DArray = 4,
Texture3D = 5,
};
enum class IsberdMode : u64 {
None = 0,
Patch = 1,
@@ -398,7 +423,7 @@ enum class LmemLoadCacheManagement : u64 {
CV = 3,
};
enum class LmemStoreCacheManagement : u64 {
enum class StoreCacheManagement : u64 {
Default = 0,
CG = 1,
CS = 2,
@@ -503,11 +528,6 @@ enum class SystemVariable : u64 {
CircularQueueEntryAddressHigh = 0x63,
};
enum class PhysicalAttributeDirection : u64 {
Input = 0,
Output = 1,
};
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -529,11 +549,6 @@ union Instruction {
BitField<39, 8, Register> gpr39;
BitField<48, 16, u64> opcode;
union {
BitField<8, 8, Register> gpr;
BitField<20, 24, s64> offset;
} gmem;
union {
BitField<20, 16, u64> imm20_16;
BitField<20, 19, u64> imm20_19;
@@ -601,7 +616,6 @@ union Instruction {
} alu;
union {
BitField<38, 1, u64> idx;
BitField<51, 1, u64> saturate;
BitField<52, 2, IpaSampleMode> sample_mode;
BitField<54, 2, IpaInterpMode> interp_mode;
@@ -811,30 +825,21 @@ union Instruction {
} ld_l;
union {
BitField<44, 2, LmemStoreCacheManagement> cache_management;
BitField<44, 2, StoreCacheManagement> cache_management;
} st_l;
union {
BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> immediate_offset;
} ldg;
union {
BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> immediate_offset;
} stg;
union {
BitField<32, 1, PhysicalAttributeDirection> direction;
BitField<47, 3, AttributeSize> size;
BitField<20, 11, u64> address;
} al2p;
union {
BitField<53, 3, UniformType> type;
BitField<52, 1, u64> extended;
} generic;
union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
@@ -1231,6 +1236,20 @@ union Instruction {
}
} texs;
union {
BitField<28, 1, u64> is_array;
BitField<29, 2, TextureType> texture_type;
BitField<35, 1, u64> aoffi;
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> ms; // Multisample?
BitField<54, 1, u64> cl;
BitField<55, 1, u64> process_mode;
TextureProcessMode GetTextureProcessMode() const {
return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
}
} tld;
union {
BitField<49, 1, u64> nodep_flag;
BitField<53, 4, u64> texture_info;
@@ -1280,6 +1299,35 @@ union Instruction {
}
} tlds;
union {
BitField<24, 2, StoreCacheManagement> cache_management;
BitField<33, 3, ImageType> image_type;
BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
BitField<51, 1, u64> is_immediate;
BitField<52, 1, SurfaceDataMode> mode;
BitField<20, 3, StoreType> store_data_layout;
BitField<20, 4, u64> component_mask_selector;
bool IsComponentEnabled(std::size_t component) const {
ASSERT(mode == SurfaceDataMode::P);
constexpr u8 R = 0b0001;
constexpr u8 G = 0b0010;
constexpr u8 B = 0b0100;
constexpr u8 A = 0b1000;
constexpr std::array<u8, 16> mask = {
0, (R), (G), (R | G), (B), (R | B),
(G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A),
(B | A), (R | B | A), (G | B | A), (R | G | B | A)};
return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
}
StoreType GetStoreDataLayout() const {
ASSERT(mode == SurfaceDataMode::D_BA);
return store_data_layout;
}
} sust;
union {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
@@ -1371,6 +1419,7 @@ union Instruction {
Attribute attribute;
Sampler sampler;
Image image;
u64 value;
};
@@ -1395,24 +1444,23 @@ public:
LD_L,
LD_S,
LD_C,
LD, // Load from generic memory
LDG, // Load from global memory
ST_A,
ST_L,
ST_S,
ST, // Store in generic memory
STG, // Store in global memory
AL2P, // Transforms attribute memory into physical memory
LDG, // Load from global memory
STG, // Store in global memory
TEX,
TEX_B, // Texture Load Bindless
TXQ, // Texture Query
TXQ_B, // Texture Query Bindless
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
TLD, // Texture Load
TLDS, // Texture Load with scalar/non-vec4 source/destinations
TLD4, // Texture Load 4
TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
TMML_B, // Texture Mip Map Level
TMML, // Texture Mip Map Level
SUST, // Surface Store
EXIT,
IPA,
OUT_R, // Emit vertex/primitive
@@ -1543,6 +1591,7 @@ public:
Synch,
Memory,
Texture,
Image,
FloatSet,
FloatSetPredicate,
IntegerSet,
@@ -1668,24 +1717,23 @@ private:
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("100-------------", Id::LD, Type::Memory, "LD"),
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("101-------------", Id::ST, Type::Memory, "ST"),
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),

View File

@@ -118,7 +118,7 @@ void SynchState::WaitForSynchronization(u64 fence) {
// Wait for the GPU to be idle (all commands to be executed)
{
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock lock{synchronization_mutex};
std::unique_lock<std::mutex> lock{synchronization_mutex};
synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
}
}

View File

@@ -81,6 +81,12 @@ struct CommandDataContainer {
CommandDataContainer(CommandData&& data, u64 next_fence)
: data{std::move(data)}, fence{next_fence} {}
CommandDataContainer& operator=(const CommandDataContainer& t) {
data = std::move(t.data);
fence = t.fence;
return *this;
}
CommandData data;
u64 fence{};
};
@@ -103,7 +109,7 @@ struct SynchState final {
void TrySynchronize() {
if (IsSynchronized()) {
std::lock_guard lock{synchronization_mutex};
std::lock_guard<std::mutex> lock{synchronization_mutex};
synchronization_condition.notify_one();
}
}

View File

@@ -118,12 +118,10 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
static_cast<u32>(opcode.operation.Value()));
}
// An instruction with the Exit flag will not actually
// cause an exit if it's executed inside a delay slot.
// TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further
// testing on the MME code.
if (opcode.is_exit) {
// Exit has a delay slot, execute the next instruction
// Note: Executing an exit during a branch delay slot will cause the instruction at the
// branch target to be executed before exiting.
Step(offset, true);
return false;
}

View File

@@ -144,9 +144,8 @@ protected:
object->SetIsRegistered(false);
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
const CacheAddr addr = object->GetCacheAddr();
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
map_cache.erase(addr);
map_cache.erase(object->GetCacheAddr());
}
/// Returns a ticks counter used for tracking when cached objects were last modified

View File

@@ -21,15 +21,11 @@ T GetInteger(GLenum pname) {
Device::Device() {
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
has_variable_aoffi = TestVariableAoffi();
}
Device::Device(std::nullptr_t) {
uniform_buffer_alignment = 0;
max_vertex_attributes = 16;
max_varyings = 15;
has_variable_aoffi = true;
}

View File

@@ -5,7 +5,6 @@
#pragma once
#include <cstddef>
#include "common/common_types.h"
namespace OpenGL {
@@ -18,14 +17,6 @@ public:
return uniform_buffer_alignment;
}
u32 GetMaxVertexAttributes() const {
return max_vertex_attributes;
}
u32 GetMaxVaryings() const {
return max_varyings;
}
bool HasVariableAoffi() const {
return has_variable_aoffi;
}
@@ -34,8 +25,6 @@ private:
static bool TestVariableAoffi();
std::size_t uniform_buffer_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
bool has_variable_aoffi{};
};

View File

@@ -29,8 +29,10 @@
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = VideoCore::Surface::PixelFormat;
using SurfaceType = VideoCore::Surface::SurfaceType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
@@ -119,11 +121,6 @@ void RasterizerOpenGL::CheckExtensions() {
Render_OpenGL,
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
}
if (!GLAD_GL_ARB_buffer_storage) {
LOG_WARNING(
Render_OpenGL,
"Buffer storage control is not supported! This can cause performance degradation.");
}
}
GLuint RasterizerOpenGL::SetupVertexFormat() {
@@ -261,8 +258,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
// MakeQuadArray always generates u32 indexes
params.index_format = GL_UNSIGNED_INT;
params.count = (regs.vertex_buffer.count / 4) * 6;
params.index_buffer_offset = primitive_assembler.MakeQuadArray(
regs.vertex_buffer.first, regs.vertex_buffer.count);
params.index_buffer_offset =
primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
}
return params;
}
@@ -323,8 +320,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)};
const auto [program_handle, next_bindings] =
shader->GetProgramHandle(primitive_mode, base_bindings);
const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)};
SetupConstBuffers(stage_enum, shader, base_bindings);
SetupGlobalRegions(stage_enum, shader, base_bindings);
const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -342,11 +345,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
shader_config.enable.Value(), shader_config.offset);
}
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
SetupTextures(stage_enum, shader, program_handle, base_bindings);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -809,8 +807,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
}
void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLuint program_handle,
BaseBindings base_bindings) {
const Shader& shader, BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -857,8 +854,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
}
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings) {
const Shader& shader, BaseBindings base_bindings) {
const auto& entries = shader->GetShaderEntries().global_memory_entries;
for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry{entries[bindpoint]};
@@ -871,8 +867,8 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
}
}
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings) {
TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -881,6 +877,8 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
"Exceeded the number of active textures.");
TextureBufferUsage texture_buffer_usage{0};
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
Tegra::Texture::FullTextureInfo texture;
@@ -894,18 +892,25 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
}
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc);
auto& unit{state.texture_units[current_bindpoint]};
unit.sampler = sampler_cache.GetSampler(texture.tsc);
if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
state.texture_units[current_bindpoint].texture =
surface->Texture(entry.IsArray()).handle;
if (surface->GetSurfaceParams().target == SurfaceTarget::TextureBuffer) {
// Record that this texture is a texture buffer.
texture_buffer_usage.set(bindpoint);
}
unit.texture = surface->Texture(entry.IsArray()).handle;
surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
texture.tic.w_source);
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
state.texture_units[current_bindpoint].texture = 0;
unit.texture = 0;
}
}
return texture_buffer_usage;
}
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
@@ -1135,9 +1140,7 @@ void RasterizerOpenGL::SyncTransformFeedback() {
void RasterizerOpenGL::SyncPointState() {
const auto& regs = system.GPU().Maxwell3D().regs;
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL).
state.point.size = std::max(1.0f, regs.point_size);
state.point.size = regs.point_size;
}
void RasterizerOpenGL::SyncPolygonOffset() {

View File

@@ -106,16 +106,16 @@ private:
/// Configures the current constbuffers to use for the draw command.
void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
BaseBindings base_bindings);
/// Configures the current global memory entries to use for the draw command.
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings);
const Shader& shader, BaseBindings base_bindings);
/// Configures the current textures to use for the draw command.
void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
/// Configures the current textures to use for the draw command. Returns shaders texture buffer
/// usage.
TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, BaseBindings base_bindings);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);

View File

@@ -140,7 +140,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
if (!params.is_tiled) {
if (config.tic.IsLineal()) {
params.pitch = config.tic.Pitch();
}
params.unaligned_height = config.tic.Height();
@@ -149,6 +149,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
switch (params.target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::TextureBuffer:
case SurfaceTarget::Texture2D:
params.depth = 1;
break;
@@ -389,6 +390,8 @@ static GLenum SurfaceTargetToGL(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
return GL_TEXTURE_1D;
case SurfaceTarget::TextureBuffer:
return GL_TEXTURE_BUFFER;
case SurfaceTarget::Texture2D:
return GL_TEXTURE_2D;
case SurfaceTarget::Texture3D:
@@ -600,29 +603,35 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
switch (params.target) {
case SurfaceTarget::Texture1D:
glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width);
glTextureStorage1D(texture.handle, params.max_mip_level, gl_internal_format, width);
break;
case SurfaceTarget::TextureBuffer:
texture_buffer.Create();
glNamedBufferStorage(texture_buffer.handle,
params.width * GetBytesPerPixel(params.pixel_format), nullptr,
GL_DYNAMIC_STORAGE_BIT);
glTextureBuffer(texture.handle, gl_internal_format, texture_buffer.handle);
break;
case SurfaceTarget::Texture2D:
case SurfaceTarget::TextureCubemap:
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height);
glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height, params.depth);
glTextureStorage3D(texture.handle, params.max_mip_level, gl_internal_format, width, height,
params.depth);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height);
glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
}
ApplyTextureDefaults(texture.handle, params.max_mip_level);
if (params.target != SurfaceTarget::TextureBuffer) {
ApplyTextureDefaults(texture.handle, params.max_mip_level);
}
OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
}
@@ -785,6 +794,13 @@ void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_t
glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::TextureBuffer:
ASSERT(mip_map == 0);
glNamedBufferSubData(texture_buffer.handle, x0,
static_cast<GLsizeiptr>(rect.GetWidth()) *
GetBytesPerPixel(params.pixel_format),
&gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture2D:
glTextureSubImage2D(texture.handle, mip_map, x0, y0,
static_cast<GLsizei>(rect.GetWidth()),
@@ -860,6 +876,9 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
Tegra::Texture::SwizzleSource swizzle_y,
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w) {
if (params.target == SurfaceTarget::TextureBuffer) {
return;
}
const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);

View File

@@ -250,6 +250,8 @@ struct SurfaceParams {
switch (target) {
case SurfaceTarget::Texture1D:
return "1D";
case SurfaceTarget::TextureBuffer:
return "Buffer";
case SurfaceTarget::Texture2D:
return "2D";
case SurfaceTarget::Texture3D:
@@ -439,6 +441,7 @@ private:
OGLTexture texture;
OGLTexture discrepant_view;
OGLBuffer texture_buffer;
SurfaceParams params{};
GLenum gl_target{};
GLenum gl_internal_format{};

View File

@@ -164,8 +164,12 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
}
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
GLenum primitive_mode, bool hint_retrievable = false) {
Maxwell::ShaderProgram program_type, const ProgramVariant& variant,
bool hint_retrievable = false) {
auto base_bindings{variant.base_bindings};
const auto primitive_mode{variant.primitive_mode};
const auto texture_buffer_usage{variant.texture_buffer_usage};
std::string source = "#version 430 core\n";
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
@@ -181,6 +185,18 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
base_bindings.sampler++);
}
for (const auto& image : entries.images) {
source +=
fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
}
// Transform 1D textures to texture samplers by declaring its preprocessor macros.
for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) {
if (!texture_buffer_usage.test(i)) {
continue;
}
source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
}
if (program_type == Maxwell::ShaderProgram::Geometry) {
const auto [glsl_topology, debug_name, max_vertices] =
@@ -256,20 +272,18 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
shader_length = entries.shader_length;
}
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings) {
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
GLuint handle{};
if (program_type == Maxwell::ShaderProgram::Geometry) {
handle = GetGeometryShader(primitive_mode, base_bindings);
handle = GetGeometryShader(variant);
} else {
const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
const auto [entry, is_cache_miss] = programs.try_emplace(variant);
auto& program = entry->second;
if (is_cache_miss) {
program = TryLoadProgram(primitive_mode, base_bindings);
program = TryLoadProgram(variant);
if (!program) {
program =
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
program = SpecializeShader(code, entries, program_type, variant);
disk_cache.SaveUsage(GetUsage(variant));
}
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
@@ -278,6 +292,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
handle = program->handle;
}
auto base_bindings{variant.base_bindings};
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
@@ -285,43 +300,42 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
return {handle, base_bindings};
}
GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
GLuint CachedShader::GetGeometryShader(const ProgramVariant& variant) {
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(variant);
auto& programs = entry->second;
switch (primitive_mode) {
switch (variant.primitive_mode) {
case GL_POINTS:
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.points, variant);
case GL_LINES:
case GL_LINE_STRIP:
return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.lines, variant);
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.lines_adjacency, variant);
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.triangles, variant);
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.triangles_adjacency, variant);
default:
UNREACHABLE_MSG("Unknown primitive mode.");
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.points, variant);
}
}
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
GLenum primitive_mode) {
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program,
const ProgramVariant& variant) {
if (target_program) {
return target_program->handle;
}
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
target_program = TryLoadProgram(primitive_mode, base_bindings);
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(variant.primitive_mode);
target_program = TryLoadProgram(variant);
if (!target_program) {
target_program =
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
target_program = SpecializeShader(code, entries, program_type, variant);
disk_cache.SaveUsage(GetUsage(variant));
}
LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
@@ -329,18 +343,19 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
return target_program->handle;
};
CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
BaseBindings base_bindings) const {
const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
const auto found = precompiled_programs.find(GetUsage(variant));
if (found == precompiled_programs.end()) {
return {};
}
return found->second;
}
ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
BaseBindings base_bindings) const {
return {unique_identifier, base_bindings, primitive_mode};
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
ShaderDiskCacheUsage usage;
usage.unique_identifier = unique_identifier;
usage.variant = variant;
return usage;
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -394,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
if (!shader) {
shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
usage.bindings, usage.primitive, true);
usage.variant, true);
}
precompiled_programs.insert({usage, std::move(shader)});

View File

@@ -6,6 +6,7 @@
#include <array>
#include <atomic>
#include <bitset>
#include <memory>
#include <set>
#include <tuple>
@@ -22,7 +23,7 @@
namespace Core {
class System;
} // namespace Core
}
namespace OpenGL {
@@ -63,8 +64,7 @@ public:
}
/// Gets the GL program handle for the shader
std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings);
std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
private:
// Geometry programs. These are needed because GLSL needs an input topology but it's not
@@ -78,15 +78,14 @@ private:
CachedProgram triangles_adjacency;
};
GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
GLuint GetGeometryShader(const ProgramVariant& variant);
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
GLenum primitive_mode);
GLuint LazyGeometryProgram(CachedProgram& target_program, const ProgramVariant& variant);
CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
CachedProgram TryLoadProgram(const ProgramVariant& variant) const;
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
u8* host_ptr{};
VAddr cpu_addr{};
@@ -100,8 +99,8 @@ private:
std::string code;
std::unordered_map<BaseBindings, CachedProgram> programs;
std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
std::unordered_map<ProgramVariant, CachedProgram> programs;
std::unordered_map<ProgramVariant, GeometryPrograms> geometry_programs;
std::unordered_map<u32, GLuint> cbuf_resource_cache;
std::unordered_map<u32, GLuint> gmem_resource_cache;

File diff suppressed because it is too large Load Diff

View File

@@ -27,6 +27,7 @@ struct ShaderEntries;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ProgramResult = std::pair<std::string, ShaderEntries>;
using SamplerEntry = VideoCommon::Shader::Sampler;
using ImageEntry = VideoCommon::Shader::Image;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
@@ -74,6 +75,7 @@ struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<SamplerEntry> samplers;
std::vector<SamplerEntry> bindless_samplers;
std::vector<ImageEntry> images;
std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};

View File

@@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 {
Dump,
};
constexpr u32 NativeVersion = 1;
constexpr u32 NativeVersion = 3;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 12);
static_assert(sizeof(ShaderDiskCacheUsage) == 24);
static_assert(sizeof(BaseBindings) == 16);
static_assert(sizeof(ShaderDiskCacheUsage) == 40);
namespace {
@@ -104,9 +104,8 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
return true;
}
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
: system{system}, precompiled_cache_virtual_file_offset{0} {}
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -244,7 +243,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
auto entry = LoadDecompiledEntry();
const auto entry = LoadDecompiledEntry();
if (!entry) {
return {};
}
@@ -287,82 +286,97 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
if (!LoadObjectFromPrecompiled(code_size)) {
return {};
}
std::string code(code_size, '\0');
std::vector<u8> code(code_size);
if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
return {};
}
ShaderDiskCacheDecompiled entry;
entry.code = std::move(code);
entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
u32 const_buffers_count{};
if (!LoadObjectFromPrecompiled(const_buffers_count)) {
return {};
}
for (u32 i = 0; i < const_buffers_count; ++i) {
u32 max_offset{};
u32 index{};
bool is_indirect{};
u8 is_indirect{};
if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(is_indirect)) {
return {};
}
entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
}
u32 samplers_count{};
if (!LoadObjectFromPrecompiled(samplers_count)) {
return {};
}
for (u32 i = 0; i < samplers_count; ++i) {
u64 offset{};
u64 index{};
u32 type{};
bool is_array{};
bool is_shadow{};
bool is_bindless{};
u8 is_array{};
u8 is_shadow{};
u8 is_bindless{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
!LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
entry.entries.samplers.emplace_back(
entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
static_cast<std::size_t>(index),
static_cast<Tegra::Shader::TextureType>(type),
is_array != 0, is_shadow != 0, is_bindless != 0);
}
u32 images_count{};
if (!LoadObjectFromPrecompiled(images_count)) {
return {};
}
for (u32 i = 0; i < images_count; ++i) {
u64 offset{};
u64 index{};
u32 type{};
u8 is_bindless{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
entry.entries.images.emplace_back(
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0);
}
u32 global_memory_count{};
if (!LoadObjectFromPrecompiled(global_memory_count)) {
return {};
}
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
bool is_read{};
bool is_written{};
u8 is_read{};
u8 is_written{};
if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
!LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
return {};
}
entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
is_written);
entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
is_written != 0);
}
for (auto& clip_distance : entry.entries.clip_distances) {
if (!LoadObjectFromPrecompiled(clip_distance)) {
u8 clip_distance_raw{};
if (!LoadObjectFromPrecompiled(clip_distance_raw))
return {};
}
clip_distance = clip_distance_raw != 0;
}
u64 shader_length{};
if (!LoadObjectFromPrecompiled(shader_length)) {
return {};
}
entry.entries.shader_length = static_cast<std::size_t>(shader_length);
return entry;
@@ -383,7 +397,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
for (const auto& cbuf : entries.const_buffers) {
if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
!SaveObjectToPrecompiled(cbuf.IsIndirect())) {
!SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
return false;
}
}
@@ -395,9 +409,21 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
!SaveObjectToPrecompiled(sampler.IsArray()) ||
!SaveObjectToPrecompiled(sampler.IsShadow()) ||
!SaveObjectToPrecompiled(sampler.IsBindless())) {
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
return false;
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
return false;
}
for (const auto& image : entries.images) {
if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) {
return false;
}
}
@@ -408,13 +434,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
for (const auto& gmem : entries.global_memory_entries) {
if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
!SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
!SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
return false;
}
}
for (const bool clip_distance : entries.clip_distances) {
if (!SaveObjectToPrecompiled(clip_distance)) {
if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
return false;
}
}

View File

@@ -33,14 +33,18 @@ namespace OpenGL {
using ProgramCode = std::vector<u64>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
/// Allocated bindings used by an OpenGL shader program
using TextureBufferUsage = std::bitset<64>;
/// Allocated bindings used by an OpenGL shader program.
struct BaseBindings {
u32 cbuf{};
u32 gmem{};
u32 sampler{};
u32 image{};
bool operator==(const BaseBindings& rhs) const {
return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
return std::tie(cbuf, gmem, sampler, image) ==
std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
}
bool operator!=(const BaseBindings& rhs) const {
@@ -48,15 +52,29 @@ struct BaseBindings {
}
};
/// Describes how a shader is used
/// Describes the different variants a single program can be compiled.
struct ProgramVariant {
BaseBindings base_bindings;
GLenum primitive_mode{};
TextureBufferUsage texture_buffer_usage{};
bool operator==(const ProgramVariant& rhs) const {
return std::tie(base_bindings, primitive_mode, texture_buffer_usage) ==
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage);
}
bool operator!=(const ProgramVariant& rhs) const {
return !operator==(rhs);
}
};
/// Describes how a shader is used.
struct ShaderDiskCacheUsage {
u64 unique_identifier{};
BaseBindings bindings;
GLenum primitive{};
ProgramVariant variant;
bool operator==(const ShaderDiskCacheUsage& rhs) const {
return std::tie(unique_identifier, bindings, primitive) ==
std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant);
}
bool operator!=(const ShaderDiskCacheUsage& rhs) const {
@@ -70,16 +88,28 @@ namespace std {
template <>
struct hash<OpenGL::BaseBindings> {
std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
return static_cast<std::size_t>(bindings.cbuf) ^
(static_cast<std::size_t>(bindings.gmem) << 8) ^
(static_cast<std::size_t>(bindings.sampler) << 16) ^
(static_cast<std::size_t>(bindings.image) << 24);
}
};
template <>
struct hash<OpenGL::ProgramVariant> {
std::size_t operator()(const OpenGL::ProgramVariant& variant) const {
return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^
std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^
(static_cast<std::size_t>(variant.primitive_mode) << 6);
}
};
template <>
struct hash<OpenGL::ShaderDiskCacheUsage> {
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
return static_cast<std::size_t>(usage.unique_identifier) ^
std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
std::hash<OpenGL::ProgramVariant>()(usage.variant);
}
};
@@ -162,7 +192,6 @@ struct ShaderDiskCacheDump {
class ShaderDiskCacheOpenGL {
public:
explicit ShaderDiskCacheOpenGL(Core::System& system);
~ShaderDiskCacheOpenGL();
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
@@ -260,35 +289,21 @@ private:
return SaveArrayToPrecompiled(&object, 1);
}
bool SaveObjectToPrecompiled(bool object) {
const auto value = static_cast<u8>(object);
return SaveArrayToPrecompiled(&value, 1);
}
template <typename T>
bool LoadObjectFromPrecompiled(T& object) {
return LoadArrayFromPrecompiled(&object, 1);
}
bool LoadObjectFromPrecompiled(bool& object) {
u8 value;
const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
if (!read_ok) {
return false;
}
object = value != 0;
return true;
}
// Core system
Core::System& system;
// Stored transferable shaders
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
// Stores whole precompiled cache which will be read from/saved to the precompiled cache file
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
// Stores the current offset of the precompiled cache file for IO purposes
std::size_t precompiled_cache_virtual_file_offset = 0;
std::size_t precompiled_cache_virtual_file_offset;
// Stored transferable shaders
std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
// The cache has been loaded at boot
bool tried_to_load{};

View File

@@ -33,14 +33,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
};
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
out += program.first;
if (setup.IsDualProgram()) {
const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
ProgramResult program_b =
Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
@@ -76,7 +76,7 @@ void main() {
}
})";
return {std::move(out), std::move(program.second)};
return {out, program.second};
}
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
@@ -97,7 +97,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
};
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
out += program.first;
@@ -107,7 +107,7 @@ void main() {
execute_geometry();
};)";
return {std::move(out), std::move(program.second)};
return {out, program.second};
}
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
@@ -160,7 +160,7 @@ bool AlphaFunc(in float value) {
}
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
@@ -172,7 +172,7 @@ void main() {
}
)";
return {std::move(out), std::move(program.second)};
return {out, program.second};
}
} // namespace OpenGL::GLShader

View File

@@ -15,7 +15,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent)
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
bool use_persistent)
: buffer_size(size) {
gl_buffer.Create();
@@ -29,7 +30,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
allocate_size *= 2;
}
if (GLAD_GL_ARB_buffer_storage) {
if (use_persistent) {
persistent = true;
coherent = prefer_coherent;
const GLbitfield flags =

View File

@@ -13,7 +13,8 @@ namespace OpenGL {
class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false);
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
bool use_persistent = true);
~OGLStreamBuffer();
GLuint GetHandle() const;

View File

@@ -126,8 +126,6 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_TRIANGLES;
case Maxwell::PrimitiveTopology::TriangleStrip:
return GL_TRIANGLE_STRIP;
case Maxwell::PrimitiveTopology::TriangleFan:
return GL_TRIANGLE_FAN;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();

View File

@@ -472,7 +472,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
}
}
/// Initialize the renderer
bool RendererOpenGL::Init() {
Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};

View File

@@ -194,8 +194,8 @@ public:
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
for (const auto& attribute : ir.GetInputAttributes()) {
entries.attributes.insert(GetGenericAttributeLocation(attribute));
for (const auto& attr : ir.GetInputAttributes()) {
entries.attributes.insert(GetGenericAttributeLocation(attr.first));
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
@@ -321,7 +321,8 @@ private:
}
void DeclareInputAttributes() {
for (const auto index : ir.GetInputAttributes()) {
for (const auto element : ir.GetInputAttributes()) {
const Attribute::Index index = element.first;
if (!IsGenericAttribute(index)) {
continue;
}
@@ -929,6 +930,11 @@ private:
return {};
}
Id ImageStore(Operation operation) {
UNIMPLEMENTED();
return {};
}
Id Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(operation[0]);
UNIMPLEMENTED_IF(!target);
@@ -1281,6 +1287,8 @@ private:
&SPIRVDecompiler::TextureQueryLod,
&SPIRVDecompiler::TexelFetch,
&SPIRVDecompiler::ImageStore,
&SPIRVDecompiler::Branch,
&SPIRVDecompiler::PushFlowStack,
&SPIRVDecompiler::PopFlowStack,

View File

@@ -168,6 +168,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
{OpCode::Type::Image, &ShaderIR::DecodeImage},
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},

View File

@@ -4,7 +4,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -153,4 +152,4 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -4,7 +4,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"

View File

@@ -4,7 +4,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -48,4 +47,4 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -49,4 +49,4 @@ u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -93,4 +93,4 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
}
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -46,4 +46,4 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -38,4 +38,4 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -56,4 +56,4 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -55,4 +55,4 @@ u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -53,4 +53,4 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -6,7 +6,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -65,4 +64,4 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -59,4 +59,4 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -0,0 +1,115 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
namespace {
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
switch (image_type) {
case Tegra::Shader::ImageType::Texture1D:
case Tegra::Shader::ImageType::TextureBuffer:
return 1;
case Tegra::Shader::ImageType::Texture1DArray:
case Tegra::Shader::ImageType::Texture2D:
return 2;
case Tegra::Shader::ImageType::Texture2DArray:
case Tegra::Shader::ImageType::Texture3D:
return 3;
}
UNREACHABLE();
return 1;
}
} // Anonymous namespace
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::SUST: {
UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer);
UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
std::vector<Node> values;
constexpr std::size_t hardcoded_size{4};
for (std::size_t i = 0; i < hardcoded_size; ++i) {
values.push_back(GetRegister(instr.gpr0.Value() + i));
}
std::vector<Node> coords;
const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
for (std::size_t i = 0; i < num_coords; ++i) {
coords.push_back(GetRegister(instr.gpr8.Value() + i));
}
const auto type{instr.sust.image_type};
const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
: GetBindlessImage(instr.gpr39, type)};
MetaImage meta{image, values};
const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))};
bb.push_back(store);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
}
return pc;
}
const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
const auto offset{static_cast<std::size_t>(image.index.Value())};
// If this image has already been used, return the existing mapping.
const auto itr{std::find_if(used_images.begin(), used_images.end(),
[=](const Image& entry) { return entry.GetOffset() == offset; })};
if (itr != used_images.end()) {
ASSERT(itr->GetType() == type);
return *itr;
}
// Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
const Image entry{offset, next_index, type};
return *used_images.emplace(entry).first;
}
const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
Tegra::Shader::ImageType type) {
const Node image_register{GetRegister(reg)};
const Node base_image{
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
const auto cbuf{std::get_if<CbufNode>(base_image)};
const auto cbuf_offset_imm{std::get_if<ImmediateNode>(cbuf->GetOffset())};
const auto cbuf_offset{cbuf_offset_imm->GetValue()};
const auto cbuf_index{cbuf->GetIndex()};
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
// If this image has already been used, return the existing mapping.
const auto itr{std::find_if(used_images.begin(), used_images.end(),
[=](const Image& entry) { return entry.GetOffset() == cbuf_key; })};
if (itr != used_images.end()) {
ASSERT(itr->GetType() == type);
return *itr;
}
// Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
const Image entry{cbuf_index, cbuf_offset, next_index, type};
return *used_images.emplace(entry).first;
}
} // namespace VideoCommon::Shader

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -46,4 +47,4 @@ u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -50,4 +50,4 @@ u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -47,20 +47,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
"Indirect attribute loads are not supported");
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
"Non-32 bits PHYS reads are not implemented");
const Node buffer{GetRegister(instr.gpr39)};
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
Tegra::Shader::IpaSampleMode::Default};
u64 next_element = instr.attribute.fmt20.element;
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
const auto LoadNextElement = [&](u32 reg_offset) {
const Node attribute{instr.attribute.fmt20.IsPhysical()
? GetPhysicalInputAttribute(instr.gpr8, buffer)
: GetInputAttribute(static_cast<Attribute::Index>(next_index),
next_element, buffer)};
const Node buffer = GetRegister(instr.gpr39);
const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
next_element, input_mode, buffer);
SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
@@ -146,25 +143,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
case OpCode::Id::LD:
case OpCode::Id::LDG: {
const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
switch (opcode->get().GetId()) {
case OpCode::Id::LD:
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
return instr.generic.type;
case OpCode::Id::LDG:
return instr.ldg.type;
default:
UNREACHABLE();
return {};
}
}();
const auto [real_address_base, base_address, descriptor] =
TrackAndGetGlobalMemory(bb, instr, false);
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
const u32 count = GetUniformTypeElementsCount(type);
const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
@@ -178,6 +162,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
case OpCode::Id::STG: {
const auto [real_address_base, base_address, descriptor] =
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
static_cast<u32>(instr.stg.immediate_offset.Value()), true);
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
SetTemporal(bb, 0, real_address_base);
const u32 count = GetUniformTypeElementsCount(instr.stg.type);
for (u32 i = 0; i < count; ++i) {
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
}
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
}
break;
}
case OpCode::Id::ST_A: {
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
"Indirect attribute loads are not supported");
@@ -233,56 +239,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
case OpCode::Id::ST:
case OpCode::Id::STG: {
const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
switch (opcode->get().GetId()) {
case OpCode::Id::ST:
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
return instr.generic.type;
case OpCode::Id::STG:
return instr.stg.type;
default:
UNREACHABLE();
return {};
}
}();
const auto [real_address_base, base_address, descriptor] =
TrackAndGetGlobalMemory(bb, instr, true);
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
SetTemporal(bb, 0, real_address_base);
const u32 count = GetUniformTypeElementsCount(type);
for (u32 i = 0; i < count; ++i) {
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
}
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
}
break;
}
case OpCode::Id::AL2P: {
// Ignore al2p.direction since we don't care about it.
// Calculate emulation fake physical address.
const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
const Node reg{GetRegister(instr.gpr8)};
const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
// Set the fake address to target register.
SetRegister(bb, instr.gpr0, fake_address);
// Signal the shader IR to declare all possible attributes and varyings
uses_physical_attributes = true;
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
}
@@ -291,11 +247,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
Instruction instr,
Node addr_register,
u32 immediate_offset,
bool is_write) {
const auto addr_register{GetRegister(instr.gmem.gpr)};
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
const Node base_address{
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
const auto cbuf = std::get_if<CbufNode>(base_address);

View File

@@ -4,7 +4,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -131,18 +130,15 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::IPA: {
const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
const auto attribute = instr.attribute.fmt28;
const auto& attribute = instr.attribute.fmt28;
const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
instr.ipa.sample_mode.Value()};
Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
: GetInputAttribute(attribute.index, attribute.element);
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
Node value = attr;
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
index <= Tegra::Shader::Attribute::Index::Attribute_31;
if (is_generic || is_physical) {
if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
index <= Tegra::Shader::Attribute::Index::Attribute_31) {
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
// In theory by setting them as perspective, OpenGL does the perspective correction.
// A way must figured to reverse the last step of it.

View File

@@ -64,4 +64,4 @@ u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -43,4 +43,4 @@ u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -48,4 +48,4 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -52,4 +52,4 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -244,6 +244,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
break;
}
case OpCode::Id::TLD: {
UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
if (instr.tld.nodep_flag) {
LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
}
WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
break;
}
case OpCode::Id::TLDS: {
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
@@ -574,6 +586,38 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
return values;
}
Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
const auto texture_type{instr.tld.texture_type};
const bool is_array{instr.tld.is_array};
const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
const std::size_t coord_count{GetCoordCount(texture_type)};
u64 gpr8_cursor{instr.gpr8.Value()};
const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
std::vector<Node> coords;
for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(gpr8_cursor++));
}
u64 gpr20_cursor{instr.gpr20.Value()};
// const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
}
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;

View File

@@ -108,4 +108,4 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
}
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -21,13 +21,6 @@ using Tegra::Shader::PredCondition;
using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset)
: program_code{program_code}, main_offset{main_offset} {
Decode();
}
ShaderIR::~ShaderIR() = default;
Node ShaderIR::StoreNode(NodeData&& node_data) {
auto store = std::make_unique<NodeData>(node_data);
const Node node = store.get();
@@ -39,8 +32,8 @@ Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) {
return StoreNode(ConditionalNode(condition, std::move(code)));
}
Node ShaderIR::Comment(std::string text) {
return StoreNode(CommentNode(std::move(text)));
Node ShaderIR::Comment(const std::string& text) {
return StoreNode(CommentNode(text));
}
Node ShaderIR::Immediate(u32 value) {
@@ -96,14 +89,13 @@ Node ShaderIR::GetPredicate(bool immediate) {
return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
}
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
used_input_attributes.emplace(index);
return StoreNode(AbufNode(index, static_cast<u32>(element), buffer));
}
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element,
const Tegra::Shader::IpaMode& input_mode, Node buffer) {
const auto [entry, is_new] =
used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{}));
entry->second.insert(input_mode);
Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
uses_physical_attributes = true;
return StoreNode(AbufNode(GetRegister(physical_address), buffer));
return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer));
}
Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {

View File

@@ -172,6 +172,8 @@ enum class OperationCode {
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
TexelFetch, /// (MetaTexture, int[N], int) -> float4
ImageStore, /// (MetaImage, float[N] coords) -> void
Branch, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void
PopFlowStack, /// () -> void
@@ -267,6 +269,48 @@ private:
bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
};
class Image {
public:
explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
: offset{offset}, index{index}, type{type}, is_bindless{false} {}
explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
Tegra::Shader::ImageType type)
: offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
is_bindless{true} {}
explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
bool is_bindless)
: offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {}
std::size_t GetOffset() const {
return offset;
}
std::size_t GetIndex() const {
return index;
}
Tegra::Shader::ImageType GetType() const {
return type;
}
bool IsBindless() const {
return is_bindless;
}
bool operator<(const Image& rhs) const {
return std::tie(offset, index, type, is_bindless) <
std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
}
private:
std::size_t offset{};
std::size_t index{};
Tegra::Shader::ImageType type{};
bool is_bindless{};
};
class ConstBuffer {
public:
explicit ConstBuffer(u32 max_offset, bool is_indirect)
@@ -328,31 +372,45 @@ struct MetaTexture {
u32 element{};
};
constexpr MetaArithmetic PRECISE = {true};
constexpr MetaArithmetic NO_PRECISE = {false};
struct MetaImage {
const Image& image;
std::vector<Node> values;
};
using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
inline constexpr MetaArithmetic PRECISE = {true};
inline constexpr MetaArithmetic NO_PRECISE = {false};
using Meta = std::variant<MetaArithmetic, MetaTexture, MetaImage, Tegra::Shader::HalfType>;
/// Holds any kind of operation that can be done in the IR
class OperationNode final {
public:
explicit OperationNode(OperationCode code) : code{code} {}
explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {}
template <typename... T>
explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {}
template <typename... T>
explicit OperationNode(OperationCode code, const T*... operands)
explicit constexpr OperationNode(OperationCode code, Meta&& meta)
: code{code}, meta{std::move(meta)} {}
template <typename... T>
explicit constexpr OperationNode(OperationCode code, const T*... operands)
: OperationNode(code, {}, operands...) {}
template <typename... T>
explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
: code{code}, meta{std::move(meta)}, operands{operands_...} {}
explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
: code{code}, meta{std::move(meta)} {
auto operands_list = {operands_...};
for (auto& operand : operands_list) {
operands.push_back(operand);
}
}
explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands)
: code{code}, meta{meta}, operands{std::move(operands)} {}
explicit OperationNode(OperationCode code, std::vector<Node>&& operands)
: code{code}, operands{std::move(operands)} {}
: code{code}, meta{}, operands{std::move(operands)} {}
OperationCode GetCode() const {
return code;
@@ -456,14 +514,17 @@ private:
/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
class AbufNode final {
public:
// Initialize for standard attributes (index is explicit).
explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
const Tegra::Shader::IpaMode& input_mode, Node buffer = {})
: input_mode{input_mode}, buffer{buffer}, index{index}, element{element} {}
explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
Node buffer = {})
: buffer{buffer}, index{index}, element{element} {}
: input_mode{}, buffer{buffer}, index{index}, element{element} {}
// Initialize for physical attributes (index is a variable value).
explicit constexpr AbufNode(Node physical_address, Node buffer = {})
: physical_address{physical_address}, buffer{buffer} {}
Tegra::Shader::IpaMode GetInputMode() const {
return input_mode;
}
Tegra::Shader::Attribute::Index GetIndex() const {
return index;
@@ -477,19 +538,11 @@ public:
return buffer;
}
bool IsPhysicalBuffer() const {
return physical_address != nullptr;
}
Node GetPhysicalAddress() const {
return physical_address;
}
private:
Node physical_address{};
Node buffer{};
Tegra::Shader::Attribute::Index index{};
u32 element{};
const Tegra::Shader::IpaMode input_mode;
const Node buffer;
const Tegra::Shader::Attribute::Index index;
const u32 element;
};
/// Constant buffer node, usually mapped to uniform buffers in GLSL
@@ -563,8 +616,11 @@ private:
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset);
~ShaderIR();
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset)
: program_code{program_code}, main_offset{main_offset} {
Decode();
}
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
return basic_blocks;
@@ -578,7 +634,8 @@ public:
return used_predicates;
}
const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>&
GetInputAttributes() const {
return used_input_attributes;
}
@@ -594,6 +651,10 @@ public:
return used_samplers;
}
const std::set<Image>& GetImages() const {
return used_images;
}
const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
const {
return used_clip_distances;
@@ -607,10 +668,6 @@ public:
return static_cast<std::size_t>(coverage_end * sizeof(u64));
}
bool HasPhysicalAttributes() const {
return uses_physical_attributes;
}
const Tegra::Shader::Header& GetHeader() const {
return header;
}
@@ -644,6 +701,7 @@ private:
u32 DecodeConversion(NodeBlock& bb, u32 pc);
u32 DecodeMemory(NodeBlock& bb, u32 pc);
u32 DecodeTexture(NodeBlock& bb, u32 pc);
u32 DecodeImage(NodeBlock& bb, u32 pc);
u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
@@ -663,7 +721,7 @@ private:
/// Creates a conditional node
Node Conditional(Node condition, std::vector<Node>&& code);
/// Creates a commentary
Node Comment(std::string text);
Node Comment(const std::string& text);
/// Creates an u32 immediate
Node Immediate(u32 value);
/// Creates a s32 immediate
@@ -692,9 +750,8 @@ private:
/// Generates a predicate node for an immediate true or false value
Node GetPredicate(bool immediate);
/// Generates a node representing an input attribute. Keeps track of used attributes.
Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
/// Generates a node representing a physical input attribute.
Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element,
const Tegra::Shader::IpaMode& input_mode, Node buffer = {});
/// Generates a node representing an output attribute. Keeps track of used attributes.
Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
/// Generates a node representing an internal flag
@@ -764,6 +821,12 @@ private:
Tegra::Shader::TextureType type, bool is_array,
bool is_shadow);
/// Accesses an image.
const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
/// Access a bindless image sampler.
const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -787,6 +850,8 @@ private:
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool depth_compare, bool is_array, bool is_aoffi);
Node4 GetTldCode(Tegra::Shader::Instruction instr);
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool is_array);
@@ -811,15 +876,16 @@ private:
void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
Node op_c, Node imm_lut, bool sets_cc);
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
s64 cursor) const;
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(
NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write);
std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
Node addr_register,
u32 immediate_offset,
bool is_write);
template <typename... T>
Node Operation(OperationCode code, const T*... operands) {
@@ -831,10 +897,12 @@ private:
return StoreNode(OperationNode(code, std::move(meta), operands...));
}
template <typename... T>
Node Operation(OperationCode code, std::vector<Node>&& operands) {
return StoreNode(OperationNode(code, std::move(operands)));
}
template <typename... T>
Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) {
return StoreNode(OperationNode(code, std::move(meta), std::move(operands)));
}
@@ -866,13 +934,14 @@ private:
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>
used_input_attributes;
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
std::set<Sampler> used_samplers;
std::set<Image> used_images;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
Tegra::Shader::Header header;
};

View File

@@ -17,24 +17,22 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
for (; cursor >= 0; --cursor) {
const Node node = code.at(cursor);
if (const auto operation = std::get_if<OperationNode>(node)) {
if (operation->GetCode() == operation_code) {
if (operation->GetCode() == operation_code)
return {node, cursor};
}
}
if (const auto conditional = std::get_if<ConditionalNode>(node)) {
const auto& conditional_code = conditional->GetCode();
const auto [found, internal_cursor] = FindOperation(
conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
if (found) {
if (found)
return {found, cursor};
}
}
}
return {};
}
} // namespace
Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const {
Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
// Cbuf found, but it has to be immediate
return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
@@ -67,7 +65,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const
return nullptr;
}
std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
// that it uses as operand
const auto [found, found_cursor] =
@@ -82,7 +80,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
}
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
s64 cursor) const {
s64 cursor) {
for (; cursor >= 0; --cursor) {
const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
if (!found_node) {

View File

@@ -12,6 +12,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
switch (texture_type) {
case Tegra::Texture::TextureType::Texture1D:
return SurfaceTarget::Texture1D;
case Tegra::Texture::TextureType::Texture1DBuffer:
return SurfaceTarget::TextureBuffer;
case Tegra::Texture::TextureType::Texture2D:
case Tegra::Texture::TextureType::Texture2DNoMipmap:
return SurfaceTarget::Texture2D;
@@ -35,6 +37,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
bool SurfaceTargetIsLayered(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::TextureBuffer:
case SurfaceTarget::Texture2D:
case SurfaceTarget::Texture3D:
return false;
@@ -53,6 +56,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
bool SurfaceTargetIsArray(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::TextureBuffer:
case SurfaceTarget::Texture2D:
case SurfaceTarget::Texture3D:
case SurfaceTarget::TextureCubemap:

View File

@@ -114,6 +114,7 @@ enum class SurfaceType {
enum class SurfaceTarget {
Texture1D,
TextureBuffer,
Texture2D,
Texture3D,
Texture1DArray,

View File

@@ -172,12 +172,16 @@ struct TICEntry {
BitField<26, 1, u32> use_header_opt_control;
BitField<27, 1, u32> depth_texture;
BitField<28, 4, u32> max_mip_level;
BitField<0, 16, u32> buffer_high_width_minus_one;
};
union {
BitField<0, 16, u32> width_minus_1;
BitField<22, 1, u32> srgb_conversion;
BitField<23, 4, TextureType> texture_type;
BitField<29, 3, u32> border_size;
BitField<0, 16, u32> buffer_low_width_minus_one;
};
union {
BitField<0, 16, u32> height_minus_1;
@@ -206,7 +210,10 @@ struct TICEntry {
}
u32 Width() const {
return width_minus_1 + 1;
if (header_version != TICHeaderVersion::OneDBuffer) {
return width_minus_1 + 1;
}
return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one;
}
u32 Height() const {
@@ -240,6 +247,15 @@ struct TICEntry {
header_version == TICHeaderVersion::BlockLinearColorKey;
}
bool IsLineal() const {
return header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey;
}
bool IsBuffer() const {
return header_version == TICHeaderVersion::OneDBuffer;
}
bool IsSrgbConversionEnabled() const {
return srgb_conversion != 0;
}

View File

@@ -82,6 +82,8 @@ add_executable(yuzu
util/limitable_input_dialog.h
util/sequence_dialog/sequence_dialog.cpp
util/sequence_dialog/sequence_dialog.h
util/spinbox.cpp
util/spinbox.h
util/util.cpp
util/util.h
compatdb.cpp

View File

@@ -9,10 +9,10 @@
AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) {
ui->setupUi(this);
ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200));
ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg(
QString::fromUtf8(Common::g_build_fullname), QString::fromUtf8(Common::g_scm_branch),
QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10)));
ui->labelLogo->setPixmap(QIcon::fromTheme("yuzu").pixmap(200));
ui->labelBuildInfo->setText(
ui->labelBuildInfo->text().arg(Common::g_build_fullname, Common::g_scm_branch,
Common::g_scm_desc, QString(Common::g_build_date).left(10)));
}
AboutDialog::~AboutDialog() = default;

View File

@@ -54,6 +54,6 @@ void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_te
void QtErrorDisplay::MainWindowFinishedError() {
// Acquire the HLE mutex
std::lock_guard lock{HLE::g_hle_lock};
std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
callback();
}

View File

@@ -84,10 +84,10 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
tree_view->setContextMenuPolicy(Qt::NoContextMenu);
item_model->insertColumns(0, 1);
item_model->setHeaderData(0, Qt::Horizontal, tr("Users"));
item_model->setHeaderData(0, Qt::Horizontal, "Users");
// We must register all custom types with the Qt Automoc system so that we are able to use it
// with signals/slots. In this case, QList falls under the umbrella of custom types.
// with signals/slots. In this case, QList falls under the umbrells of custom types.
qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>");
layout->setContentsMargins(0, 0, 0, 0);

View File

@@ -188,9 +188,7 @@ private:
GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
: QWidget(parent), emu_thread(emu_thread) {
setWindowTitle(QStringLiteral("yuzu %1 | %2-%3")
.arg(QString::fromUtf8(Common::g_build_name),
QString::fromUtf8(Common::g_scm_branch),
QString::fromUtf8(Common::g_scm_desc)));
.arg(Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc));
setAttribute(Qt::WA_AcceptTouchEvents);
InputCommon::Init();
@@ -219,7 +217,7 @@ void GRenderWindow::SwapBuffers() {
// However:
// - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
// since the last time `swapBuffers` was executed;
// - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks.
// - On macOS, if `makeCurrent` isn't called explicitely, resizing the buffer breaks.
context->makeCurrent(child);
context->swapBuffers(child);
@@ -381,7 +379,6 @@ void GRenderWindow::InitRenderTarget() {
fmt.setVersion(4, 3);
if (Settings::values.use_compatibility_profile) {
fmt.setProfile(QSurfaceFormat::CompatibilityProfile);
fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
} else {
fmt.setProfile(QSurfaceFormat::CoreProfile);
}

File diff suppressed because it is too large Load Diff

View File

@@ -9,6 +9,7 @@
#include <string>
#include <QVariant>
#include "core/settings.h"
#include "yuzu/ui_settings.h"
class QSettings;
@@ -36,51 +37,19 @@ private:
void ReadTouchscreenValues();
void ApplyDefaultProfileIfInputInvalid();
// Read functions bases off the respective config section names.
void ReadAudioValues();
void ReadControlValues();
void ReadCoreValues();
void ReadDataStorageValues();
void ReadDebuggingValues();
void ReadDisabledAddOnValues();
void ReadMiscellaneousValues();
void ReadPathValues();
void ReadRendererValues();
void ReadShortcutValues();
void ReadSystemValues();
void ReadUIValues();
void ReadUIGamelistValues();
void ReadUILayoutValues();
void ReadWebServiceValues();
void SaveValues();
void SavePlayerValues();
void SaveDebugValues();
void SaveMouseValues();
void SaveTouchscreenValues();
// Save functions based off the respective config section names.
void SaveAudioValues();
void SaveControlValues();
void SaveCoreValues();
void SaveDataStorageValues();
void SaveDebuggingValues();
void SaveDisabledAddOnValues();
void SaveMiscellaneousValues();
void SavePathValues();
void SaveRendererValues();
void SaveShortcutValues();
void SaveSystemValues();
void SaveUIValues();
void SaveUIGamelistValues();
void SaveUILayoutValues();
void SaveWebServiceValues();
QVariant ReadSetting(const QString& name) const;
QVariant ReadSetting(const QString& name, const QVariant& default_value) const;
void WriteSetting(const QString& name, const QVariant& value);
void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value);
static const std::array<UISettings::Shortcut, 15> default_hotkeys;
std::unique_ptr<QSettings> qt_config;
std::string qt_config_loc;
};

View File

@@ -16,21 +16,21 @@ ConfigureAudio::ConfigureAudio(QWidget* parent)
ui->setupUi(this);
ui->output_sink_combo_box->clear();
ui->output_sink_combo_box->addItem(QString::fromUtf8(AudioCore::auto_device_name));
ui->output_sink_combo_box->addItem("auto");
for (const char* id : AudioCore::GetSinkIDs()) {
ui->output_sink_combo_box->addItem(QString::fromUtf8(id));
ui->output_sink_combo_box->addItem(id);
}
connect(ui->volume_slider, &QSlider::valueChanged, this,
&ConfigureAudio::setVolumeIndicatorText);
this->setConfiguration();
connect(ui->output_sink_combo_box, qOverload<int>(&QComboBox::currentIndexChanged), this,
connect(ui->output_sink_combo_box,
static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this,
&ConfigureAudio::updateAudioDevices);
const bool is_powered_on = Core::System::GetInstance().IsPoweredOn();
ui->output_sink_combo_box->setEnabled(!is_powered_on);
ui->audio_device_combo_box->setEnabled(!is_powered_on);
ui->output_sink_combo_box->setEnabled(!Core::System::GetInstance().IsPoweredOn());
ui->audio_device_combo_box->setEnabled(!Core::System::GetInstance().IsPoweredOn());
}
ConfigureAudio::~ConfigureAudio() = default;
@@ -94,7 +94,7 @@ void ConfigureAudio::applyConfiguration() {
void ConfigureAudio::updateAudioDevices(int sink_index) {
ui->audio_device_combo_box->clear();
ui->audio_device_combo_box->addItem(QString::fromUtf8(AudioCore::auto_device_name));
ui->audio_device_combo_box->addItem(AudioCore::auto_device_name);
const std::string sink_id = ui->output_sink_combo_box->itemText(sink_index).toStdString();
for (const auto& device : AudioCore::GetDeviceListForSink(sink_id)) {

View File

@@ -100,15 +100,13 @@ void ConfigureGameList::RetranslateUI() {
void ConfigureGameList::InitializeIconSizeComboBox() {
for (const auto& size : default_icon_sizes) {
ui->icon_size_combobox->addItem(QString::fromUtf8(size.second), size.first);
ui->icon_size_combobox->addItem(size.second, size.first);
}
}
void ConfigureGameList::InitializeRowComboBoxes() {
for (std::size_t i = 0; i < row_text_names.size(); ++i) {
const QString row_text_name = QString::fromUtf8(row_text_names[i]);
ui->row_1_text_combobox->addItem(row_text_name, QVariant::fromValue(i));
ui->row_2_text_combobox->addItem(row_text_name, QVariant::fromValue(i));
ui->row_1_text_combobox->addItem(row_text_names[i], QVariant::fromValue(i));
ui->row_2_text_combobox->addItem(row_text_names[i], QVariant::fromValue(i));
}
}

View File

@@ -14,8 +14,7 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
ui->setupUi(this);
for (const auto& theme : UISettings::themes) {
ui->theme_combobox->addItem(QString::fromUtf8(theme.first),
QString::fromUtf8(theme.second));
ui->theme_combobox->addItem(theme.first, theme.second);
}
this->setConfiguration();

Some files were not shown because too many files have changed in this diff Show More