Compare commits

..

9 Commits

Author SHA1 Message Date
ReinUsesLisp
9a8c1745f1 gl_shader_decompiler: Implement image binding settings 2019-05-16 20:03:51 -03:00
ReinUsesLisp
f96d50165f shader: Implement bindless images 2019-05-16 20:03:51 -03:00
ReinUsesLisp
f9f541470e shader: Decode SUST and implement backing image functionality 2019-05-16 20:03:51 -03:00
ReinUsesLisp
ce691745dc gl_rasterizer: Track texture buffer usage 2019-05-16 20:03:51 -03:00
ReinUsesLisp
1d59af8f7c video_core: Make ARB_buffer_storage a required extension 2019-05-16 20:03:50 -03:00
ReinUsesLisp
a6252257eb gl_rasterizer_cache: Use texture buffers to emulate texture buffers 2019-05-16 20:03:50 -03:00
ReinUsesLisp
dc5e5ac3b0 maxwell_3d: Partially implement texture buffers as 1D textures 2019-05-16 18:55:20 -03:00
ReinUsesLisp
4f612052b2 gl_shader_decompiler: Allow 1D textures to be texture buffers 2019-05-16 18:55:20 -03:00
ReinUsesLisp
89eef17670 shader: Implement texture buffers 2019-05-16 18:55:20 -03:00
155 changed files with 3595 additions and 5961 deletions

View File

@@ -132,7 +132,7 @@ find_package(Threads REQUIRED)
if (ENABLE_SDL2)
if (YUZU_USE_BUNDLED_SDL2)
# Detect toolchain and platform
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
set(SDL2_VER "SDL2-2.0.8")
else()
message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.")
@@ -165,7 +165,7 @@ if (YUZU_USE_BUNDLED_UNICORN)
if (MSVC)
message(STATUS "unicorn not found, falling back to bundled")
# Detect toolchain and platform
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
set(UNICORN_VER "unicorn-yuzu")
else()
message(FATAL_ERROR "No bundled Unicorn binaries for your toolchain. Disable YUZU_USE_BUNDLED_UNICORN and provide your own.")
@@ -233,7 +233,7 @@ endif()
if (ENABLE_QT)
if (YUZU_USE_BUNDLED_QT)
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
set(QT_VER qt-5.12.0-msvc2017_64)
else()
message(FATAL_ERROR "No bundled Qt binaries for your toolchain. Disable YUZU_USE_BUNDLED_QT and provide your own.")

View File

@@ -70,6 +70,7 @@ set(HASH_FILES
"${VIDEO_CORE}/shader/decode/half_set.cpp"
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
"${VIDEO_CORE}/shader/decode/image.cpp"
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"

View File

@@ -90,20 +90,12 @@
* int arg2) KHRONOS_APIATTRIBUTES;
*/
#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC)
# define KHRONOS_STATIC 1
#endif
/*-------------------------------------------------------------------------
* Definition of KHRONOS_APICALL
*-------------------------------------------------------------------------
* This precedes the return type of the function in the function prototype.
*/
#if defined(KHRONOS_STATIC)
/* If the preprocessor constant KHRONOS_STATIC is defined, make the
* header compatible with static linking. */
# define KHRONOS_APICALL
#elif defined(_WIN32)
#if defined(_WIN32) && !defined(__SCITECH_SNAP__)
# define KHRONOS_APICALL __declspec(dllimport)
#elif defined (__SYMBIAN32__)
# define KHRONOS_APICALL IMPORT_C
@@ -119,7 +111,7 @@
* This follows the return type of the function and precedes the function
* name in the function prototype.
*/
#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(KHRONOS_STATIC)
#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__)
/* Win32 but not WinCE */
# define KHRONOS_APIENTRY __stdcall
#else

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/decode/half_set.cpp"
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
"${VIDEO_CORE}/shader/decode/image.cpp"
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"

View File

@@ -78,17 +78,16 @@ namespace FileUtil {
// Remove any ending forward slashes from directory paths
// Modifies argument.
static void StripTailDirSlashes(std::string& fname) {
if (fname.length() <= 1) {
return;
if (fname.length() > 1) {
std::size_t i = fname.length();
while (i > 0 && fname[i - 1] == DIR_SEP_CHR)
--i;
fname.resize(i);
}
std::size_t i = fname.length();
while (i > 0 && fname[i - 1] == DIR_SEP_CHR) {
--i;
}
fname.resize(i);
return;
}
// Returns true if file filename exists
bool Exists(const std::string& filename) {
struct stat file_info;
@@ -108,6 +107,7 @@ bool Exists(const std::string& filename) {
return (result == 0);
}
// Returns true if filename is a directory
bool IsDirectory(const std::string& filename) {
struct stat file_info;
@@ -132,6 +132,8 @@ bool IsDirectory(const std::string& filename) {
return S_ISDIR(file_info.st_mode);
}
// Deletes a given filename, return true on success
// Doesn't supports deleting a directory
bool Delete(const std::string& filename) {
LOG_TRACE(Common_Filesystem, "file {}", filename);
@@ -163,6 +165,7 @@ bool Delete(const std::string& filename) {
return true;
}
// Returns true if successful, or path already exists.
bool CreateDir(const std::string& path) {
LOG_TRACE(Common_Filesystem, "directory {}", path);
#ifdef _WIN32
@@ -191,6 +194,7 @@ bool CreateDir(const std::string& path) {
#endif
}
// Creates the full path of fullPath returns true on success
bool CreateFullPath(const std::string& fullPath) {
int panicCounter = 100;
LOG_TRACE(Common_Filesystem, "path {}", fullPath);
@@ -226,6 +230,7 @@ bool CreateFullPath(const std::string& fullPath) {
}
}
// Deletes a directory filename, returns true on success
bool DeleteDir(const std::string& filename) {
LOG_TRACE(Common_Filesystem, "directory {}", filename);
@@ -247,6 +252,7 @@ bool DeleteDir(const std::string& filename) {
return false;
}
// renames file srcFilename to destFilename, returns true on success
bool Rename(const std::string& srcFilename, const std::string& destFilename) {
LOG_TRACE(Common_Filesystem, "{} --> {}", srcFilename, destFilename);
#ifdef _WIN32
@@ -262,6 +268,7 @@ bool Rename(const std::string& srcFilename, const std::string& destFilename) {
return false;
}
// copies file srcFilename to destFilename, returns true on success
bool Copy(const std::string& srcFilename, const std::string& destFilename) {
LOG_TRACE(Common_Filesystem, "{} --> {}", srcFilename, destFilename);
#ifdef _WIN32
@@ -317,6 +324,7 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) {
#endif
}
// Returns the size of filename (64bit)
u64 GetSize(const std::string& filename) {
if (!Exists(filename)) {
LOG_ERROR(Common_Filesystem, "failed {}: No such file", filename);
@@ -343,6 +351,7 @@ u64 GetSize(const std::string& filename) {
return 0;
}
// Overloaded GetSize, accepts file descriptor
u64 GetSize(const int fd) {
struct stat buf;
if (fstat(fd, &buf) != 0) {
@@ -352,6 +361,7 @@ u64 GetSize(const int fd) {
return buf.st_size;
}
// Overloaded GetSize, accepts FILE*
u64 GetSize(FILE* f) {
// can't use off_t here because it can be 32-bit
u64 pos = ftello(f);
@@ -367,6 +377,7 @@ u64 GetSize(FILE* f) {
return size;
}
// creates an empty file filename, returns true on success
bool CreateEmptyFile(const std::string& filename) {
LOG_TRACE(Common_Filesystem, "{}", filename);
@@ -491,6 +502,7 @@ bool DeleteDirRecursively(const std::string& directory, unsigned int recursion)
return true;
}
// Create directory and copy contents (does not overwrite existing files)
void CopyDir(const std::string& source_path, const std::string& dest_path) {
#ifndef _WIN32
if (source_path == dest_path)
@@ -527,7 +539,8 @@ void CopyDir(const std::string& source_path, const std::string& dest_path) {
#endif
}
std::optional<std::string> GetCurrentDir() {
// Returns the current directory
std::string GetCurrentDir() {
// Get the current working directory (getcwd uses malloc)
#ifdef _WIN32
wchar_t* dir;
@@ -537,7 +550,7 @@ std::optional<std::string> GetCurrentDir() {
if (!(dir = getcwd(nullptr, 0))) {
#endif
LOG_ERROR(Common_Filesystem, "GetCurrentDirectory failed: {}", GetLastErrorMsg());
return {};
return nullptr;
}
#ifdef _WIN32
std::string strDir = Common::UTF16ToUTF8(dir);
@@ -548,6 +561,7 @@ std::optional<std::string> GetCurrentDir() {
return strDir;
}
// Sets the current directory to the given directory
bool SetCurrentDir(const std::string& directory) {
#ifdef _WIN32
return _wchdir(Common::UTF8ToUTF16W(directory).c_str()) == 0;
@@ -659,6 +673,8 @@ std::string GetSysDirectory() {
return sysDir;
}
// Returns a string with a yuzu data dir or file in the user's home
// directory. To be used in "multi-user" mode (that is, installed).
const std::string& GetUserPath(UserPath path, const std::string& new_path) {
static std::unordered_map<UserPath, std::string> paths;
auto& user_path = paths[UserPath::UserDir];
@@ -746,11 +762,11 @@ std::string GetNANDRegistrationDir(bool system) {
return GetUserPath(UserPath::NANDDir) + "user/Contents/registered/";
}
std::size_t WriteStringToFile(bool text_file, const std::string& filename, std::string_view str) {
return IOFile(filename, text_file ? "w" : "wb").WriteString(str);
std::size_t WriteStringToFile(bool text_file, const std::string& str, const char* filename) {
return FileUtil::IOFile(filename, text_file ? "w" : "wb").WriteBytes(str.data(), str.size());
}
std::size_t ReadFileToString(bool text_file, const std::string& filename, std::string& str) {
std::size_t ReadFileToString(bool text_file, const char* filename, std::string& str) {
IOFile file(filename, text_file ? "r" : "rb");
if (!file.IsOpen())
@@ -760,6 +776,13 @@ std::size_t ReadFileToString(bool text_file, const std::string& filename, std::s
return file.ReadArray(&str[0], str.size());
}
/**
* Splits the filename into 8.3 format
* Loosely implemented following https://en.wikipedia.org/wiki/8.3_filename
* @param filename The normal filename to use
* @param short_name A 9-char array in which the short name will be written
* @param extension A 4-char array in which the extension will be written
*/
void SplitFilename83(const std::string& filename, std::array<char, 9>& short_name,
std::array<char, 4>& extension) {
const std::string forbidden_characters = ".\"/\\[]:;=, ";

View File

@@ -9,7 +9,6 @@
#include <fstream>
#include <functional>
#include <limits>
#include <optional>
#include <string>
#include <string_view>
#include <type_traits>
@@ -119,7 +118,7 @@ u64 ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry,
bool DeleteDirRecursively(const std::string& directory, unsigned int recursion = 256);
// Returns the current directory
std::optional<std::string> GetCurrentDir();
std::string GetCurrentDir();
// Create directory and copy contents (does not overwrite existing files)
void CopyDir(const std::string& source_path, const std::string& dest_path);
@@ -147,9 +146,9 @@ const std::string& GetExeDirectory();
std::string AppDataRoamingDirectory();
#endif
std::size_t WriteStringToFile(bool text_file, const std::string& filename, std::string_view str);
std::size_t WriteStringToFile(bool text_file, const std::string& str, const char* filename);
std::size_t ReadFileToString(bool text_file, const std::string& filename, std::string& str);
std::size_t ReadFileToString(bool text_file, const char* filename, std::string& str);
/**
* Splits the filename into 8.3 format
@@ -258,8 +257,8 @@ public:
return WriteArray(&object, 1);
}
std::size_t WriteString(std::string_view str) {
return WriteArray(str.data(), str.length());
std::size_t WriteString(const std::string& str) {
return WriteArray(str.c_str(), str.length());
}
bool IsOpen() const {
@@ -287,8 +286,8 @@ private:
template <typename T>
void OpenFStream(T& fstream, const std::string& filename, std::ios_base::openmode openmode) {
#ifdef _MSC_VER
fstream.open(Common::UTF8ToUTF16W(filename), openmode);
fstream.open(Common::UTF8ToUTF16W(filename).c_str(), openmode);
#else
fstream.open(filename, openmode);
fstream.open(filename.c_str(), openmode);
#endif
}

View File

@@ -14,11 +14,11 @@ namespace Core::Timing {
constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;
s64 usToCycles(s64 us) {
if (static_cast<u64>(us / 1000000) > MAX_VALUE_TO_MULTIPLY) {
if (us / 1000000 > MAX_VALUE_TO_MULTIPLY) {
LOG_ERROR(Core_Timing, "Integer overflow, use max value");
return std::numeric_limits<s64>::max();
}
if (static_cast<u64>(us) > MAX_VALUE_TO_MULTIPLY) {
if (us > MAX_VALUE_TO_MULTIPLY) {
LOG_DEBUG(Core_Timing, "Time very big, do rounding");
return BASE_CLOCK_RATE * (us / 1000000);
}
@@ -38,11 +38,11 @@ s64 usToCycles(u64 us) {
}
s64 nsToCycles(s64 ns) {
if (static_cast<u64>(ns / 1000000000) > MAX_VALUE_TO_MULTIPLY) {
if (ns / 1000000000 > MAX_VALUE_TO_MULTIPLY) {
LOG_ERROR(Core_Timing, "Integer overflow, use max value");
return std::numeric_limits<s64>::max();
}
if (static_cast<u64>(ns) > MAX_VALUE_TO_MULTIPLY) {
if (ns > MAX_VALUE_TO_MULTIPLY) {
LOG_DEBUG(Core_Timing, "Time very big, do rounding");
return BASE_CLOCK_RATE * (ns / 1000000000);
}

View File

@@ -169,7 +169,8 @@ private:
* For the request to be honored, EmuWindow implementations will usually reimplement this
* function.
*/
virtual void OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned>) {
virtual void OnMinimalClientAreaChangeRequest(
const std::pair<unsigned, unsigned>& minimal_size) {
// By default, ignore this request and do nothing.
}

View File

@@ -438,7 +438,7 @@ inline float RequestParser::Pop() {
template <>
inline double RequestParser::Pop() {
const u64 value = Pop<u64>();
double real;
float real;
std::memcpy(&real, &value, sizeof(real));
return real;
}

View File

@@ -43,7 +43,7 @@ void SessionRequestHandler::ClientDisconnected(const SharedPtr<ServerSession>& s
}
SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
const std::string& reason, u64 timeout, WakeupCallback&& callback,
SharedPtr<Thread> thread, const std::string& reason, u64 timeout, WakeupCallback&& callback,
SharedPtr<WritableEvent> writable_event) {
// Put the client thread to sleep until the wait event is signaled or the timeout expires.
thread->SetWakeupCallback([context = *this, callback](
@@ -58,7 +58,7 @@ SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
auto& kernel = Core::System::GetInstance().Kernel();
if (!writable_event) {
// Create event if not provided
const auto pair = WritableEvent::CreateEventPair(kernel, ResetType::Automatic,
const auto pair = WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"HLE Pause Event: " + reason);
writable_event = pair.writable;
}
@@ -76,9 +76,8 @@ SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
return writable_event;
}
HLERequestContext::HLERequestContext(SharedPtr<Kernel::ServerSession> server_session,
SharedPtr<Thread> thread)
: server_session(std::move(server_session)), thread(std::move(thread)) {
HLERequestContext::HLERequestContext(SharedPtr<Kernel::ServerSession> server_session)
: server_session(std::move(server_session)) {
cmd_buf[0] = 0;
}

View File

@@ -97,7 +97,7 @@ protected:
*/
class HLERequestContext {
public:
explicit HLERequestContext(SharedPtr<ServerSession> session, SharedPtr<Thread> thread);
explicit HLERequestContext(SharedPtr<ServerSession> session);
~HLERequestContext();
/// Returns a pointer to the IPC command buffer for this request.
@@ -119,6 +119,7 @@ public:
/**
* Puts the specified guest thread to sleep until the returned event is signaled or until the
* specified timeout expires.
* @param thread Thread to be put to sleep.
* @param reason Reason for pausing the thread, to be used for debugging purposes.
* @param timeout Timeout in nanoseconds after which the thread will be awoken and the callback
* invoked with a Timeout reason.
@@ -129,8 +130,8 @@ public:
* created.
* @returns Event that when signaled will resume the thread and call the callback function.
*/
SharedPtr<WritableEvent> SleepClientThread(const std::string& reason, u64 timeout,
WakeupCallback&& callback,
SharedPtr<WritableEvent> SleepClientThread(SharedPtr<Thread> thread, const std::string& reason,
u64 timeout, WakeupCallback&& callback,
SharedPtr<WritableEvent> writable_event = nullptr);
/// Populates this context with data from the requesting process/thread.
@@ -267,7 +268,6 @@ private:
std::array<u32, IPC::COMMAND_BUFFER_LENGTH> cmd_buf;
SharedPtr<Kernel::ServerSession> server_session;
SharedPtr<Thread> thread;
// TODO(yuriks): Check common usage of this and optimize size accordingly
boost::container::small_vector<SharedPtr<Object>, 8> move_objects;
boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;

View File

@@ -33,8 +33,8 @@ enum class HandleType : u32 {
};
enum class ResetType {
Automatic, ///< Reset automatically on object acquisition
Manual, ///< Never reset automatically
OneShot, ///< Reset automatically on object acquisition
Sticky, ///< Never reset automatically
};
class Object : NonCopyable {

View File

@@ -21,9 +21,8 @@ bool ReadableEvent::ShouldWait(const Thread* thread) const {
void ReadableEvent::Acquire(Thread* thread) {
ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
if (reset_type == ResetType::Automatic) {
if (reset_type == ResetType::OneShot)
signaled = false;
}
}
void ReadableEvent::Signal() {

View File

@@ -130,7 +130,7 @@ ResultCode ServerSession::HandleSyncRequest(SharedPtr<Thread> thread) {
// The ServerSession received a sync request, this means that there's new data available
// from its ClientSession, so wake up any threads that may be waiting on a svcReplyAndReceive or
// similar.
Kernel::HLERequestContext context(this, thread);
Kernel::HLERequestContext context(this);
u32* cmd_buf = (u32*)Memory::GetPointer(thread->GetTLSAddress());
context.PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);

View File

@@ -1255,8 +1255,8 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
return vm_manager.MapCodeMemory(dst_address, src_address, size);
}
static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle,
u64 dst_address, u64 src_address, u64 size) {
ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address,
u64 src_address, u64 size) {
LOG_DEBUG(Kernel_SVC,
"called. process_handle=0x{:08X}, dst_address=0x{:016X}, src_address=0x{:016X}, "
"size=0x{:016X}",
@@ -1342,7 +1342,7 @@ static void ExitProcess(Core::System& system) {
/// Creates a new thread
static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg,
VAddr stack_top, u32 priority, s32 processor_id) {
LOG_DEBUG(Kernel_SVC,
LOG_TRACE(Kernel_SVC,
"called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, "
"threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}",
entry_point, arg, stack_top, priority, processor_id, *out_handle);
@@ -1402,7 +1402,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
/// Starts the thread for the provided handle
static ResultCode StartThread(Core::System& system, Handle thread_handle) {
LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
@@ -1425,7 +1425,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
/// Called when a thread exits
static void ExitThread(Core::System& system) {
LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
current_thread->Stop();
@@ -1435,7 +1435,7 @@ static void ExitThread(Core::System& system) {
/// Sleep the current thread
static void SleepThread(Core::System& system, s64 nanoseconds) {
LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds);
LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
enum class SleepType : s64 {
YieldWithoutLoadBalancing = 0,
@@ -1880,51 +1880,11 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
}
static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
u64 affinity_mask) {
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}",
thread_handle, core, affinity_mask);
u64 mask) {
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle,
mask, core);
const auto* const current_process = system.Kernel().CurrentProcess();
if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
const u8 ideal_cpu_core = current_process->GetIdealCore();
ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
// Set the target CPU to the ideal core specified by the process.
core = ideal_cpu_core;
affinity_mask = 1ULL << core;
} else {
const u64 core_mask = current_process->GetCoreMask();
if ((core_mask | affinity_mask) != core_mask) {
LOG_ERROR(
Kernel_SVC,
"Invalid processor ID specified (core_mask=0x{:08X}, affinity_mask=0x{:016X})",
core_mask, affinity_mask);
return ERR_INVALID_PROCESSOR_ID;
}
if (affinity_mask == 0) {
LOG_ERROR(Kernel_SVC, "Specfified affinity mask is zero.");
return ERR_INVALID_COMBINATION;
}
if (core < Core::NUM_CPU_CORES) {
if ((affinity_mask & (1ULL << core)) == 0) {
LOG_ERROR(Kernel_SVC,
"Core is not enabled for the current mask, core={}, mask={:016X}", core,
affinity_mask);
return ERR_INVALID_COMBINATION;
}
} else if (core != static_cast<u32>(THREADPROCESSORID_DONT_CARE) &&
core != static_cast<u32>(THREADPROCESSORID_DONT_UPDATE)) {
LOG_ERROR(Kernel_SVC, "Invalid processor ID specified (core={}).", core);
return ERR_INVALID_PROCESSOR_ID;
}
}
const auto& handle_table = current_process->GetHandleTable();
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
if (!thread) {
LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1932,7 +1892,40 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
return ERR_INVALID_HANDLE;
}
thread->ChangeCore(core, affinity_mask);
if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
const u8 ideal_cpu_core = thread->GetOwnerProcess()->GetIdealCore();
ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
// Set the target CPU to the ideal core specified by the process.
core = ideal_cpu_core;
mask = 1ULL << core;
}
if (mask == 0) {
LOG_ERROR(Kernel_SVC, "Mask is 0");
return ERR_INVALID_COMBINATION;
}
/// This value is used to only change the affinity mask without changing the current ideal core.
static constexpr u32 OnlyChangeMask = static_cast<u32>(-3);
if (core == OnlyChangeMask) {
core = thread->GetIdealCore();
} else if (core >= Core::NUM_CPU_CORES && core != static_cast<u32>(-1)) {
LOG_ERROR(Kernel_SVC, "Invalid core specified, got {}", core);
return ERR_INVALID_PROCESSOR_ID;
}
// Error out if the input core isn't enabled in the input mask.
if (core < Core::NUM_CPU_CORES && (mask & (1ull << core)) == 0) {
LOG_ERROR(Kernel_SVC, "Core is not enabled for the current mask, core={}, mask={:016X}",
core, mask);
return ERR_INVALID_COMBINATION;
}
thread->ChangeCore(core, mask);
return RESULT_SUCCESS;
}
@@ -1987,7 +1980,7 @@ static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle
auto& kernel = system.Kernel();
const auto [readable_event, writable_event] =
WritableEvent::CreateEventPair(kernel, ResetType::Manual, "CreateEvent");
WritableEvent::CreateEventPair(kernel, ResetType::Sticky, "CreateEvent");
HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable();
@@ -2190,8 +2183,8 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
return RESULT_SUCCESS;
}
static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
u32 out_thread_ids_size, Handle debug_handle) {
ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
u32 out_thread_ids_size, Handle debug_handle) {
// TODO: Handle this case when debug events are supported.
UNIMPLEMENTED_IF(debug_handle != InvalidHandle);

View File

@@ -30,21 +30,12 @@ enum ThreadPriority : u32 {
};
enum ThreadProcessorId : s32 {
/// Indicates that no particular processor core is preferred.
THREADPROCESSORID_DONT_CARE = -1,
/// Run thread on the ideal core specified by the process.
THREADPROCESSORID_IDEAL = -2,
/// Indicates that the preferred processor ID shouldn't be updated in
/// a core mask setting operation.
THREADPROCESSORID_DONT_UPDATE = -3,
THREADPROCESSORID_0 = 0, ///< Run thread on core 0
THREADPROCESSORID_1 = 1, ///< Run thread on core 1
THREADPROCESSORID_2 = 2, ///< Run thread on core 2
THREADPROCESSORID_3 = 3, ///< Run thread on core 3
THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this
THREADPROCESSORID_IDEAL = -2, ///< Run thread on the ideal core specified by the process.
THREADPROCESSORID_0 = 0, ///< Run thread on core 0
THREADPROCESSORID_1 = 1, ///< Run thread on core 1
THREADPROCESSORID_2 = 2, ///< Run thread on core 2
THREADPROCESSORID_3 = 3, ///< Run thread on core 3
THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this
/// Allowed CPU mask
THREADPROCESSORID_DEFAULT_MASK = (1 << THREADPROCESSORID_0) | (1 << THREADPROCESSORID_1) |

View File

@@ -276,7 +276,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
"ISelfController:LaunchableEvent");
}
@@ -442,10 +442,10 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c
AppletMessageQueue::AppletMessageQueue() {
auto& kernel = Core::System::GetInstance().Kernel();
on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
"AMMessageQueue:OnMessageRecieved");
on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "AMMessageQueue:OperationModeChanged");
kernel, Kernel::ResetType::OneShot, "AMMessageQueue:OperationModeChanged");
}
AppletMessageQueue::~AppletMessageQueue() = default;
@@ -835,7 +835,6 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
return;
}
std::memcpy(backing.buffer.data() + offset, data.data(), data.size());
@@ -858,7 +857,6 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
return;
}
ctx.WriteBuffer(backing.buffer.data() + offset, size);

View File

@@ -26,11 +26,11 @@ namespace Service::AM::Applets {
AppletDataBroker::AppletDataBroker() {
auto& kernel = Core::System::GetInstance().Kernel();
state_changed_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent");
kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:StateChangedEvent");
pop_out_data_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopDataOutEvent");
kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopDataOutEvent");
pop_interactive_out_data_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
}
AppletDataBroker::~AppletDataBroker() = default;

View File

@@ -9,6 +9,7 @@
#include "core/file_sys/content_archive.h"
#include "core/file_sys/control_metadata.h"
#include "core/file_sys/nca_metadata.h"
#include "core/file_sys/partition_filesystem.h"
#include "core/file_sys/patch_manager.h"
#include "core/file_sys/registered_cache.h"
#include "core/hle/ipc_helpers.h"
@@ -17,6 +18,7 @@
#include "core/hle/kernel/readable_event.h"
#include "core/hle/kernel/writable_event.h"
#include "core/hle/service/aoc/aoc_u.h"
#include "core/hle/service/filesystem/filesystem.h"
#include "core/loader/loader.h"
#include "core/settings.h"
@@ -66,22 +68,14 @@ AOC_U::AOC_U() : ServiceFramework("aoc:u"), add_on_content(AccumulateAOCTitleIDs
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
"GetAddOnContentListChanged:Event");
}
AOC_U::~AOC_U() = default;
void AOC_U::CountAddOnContent(Kernel::HLERequestContext& ctx) {
struct Parameters {
u64 process_id;
};
static_assert(sizeof(Parameters) == 8);
IPC::RequestParser rp{ctx};
const auto params = rp.PopRaw<Parameters>();
LOG_DEBUG(Service_AOC, "called. process_id={}", params.process_id);
LOG_DEBUG(Service_AOC, "called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
@@ -100,33 +94,24 @@ void AOC_U::CountAddOnContent(Kernel::HLERequestContext& ctx) {
}
void AOC_U::ListAddOnContent(Kernel::HLERequestContext& ctx) {
struct Parameters {
u32 offset;
u32 count;
u64 process_id;
};
static_assert(sizeof(Parameters) == 16);
IPC::RequestParser rp{ctx};
const auto [offset, count, process_id] = rp.PopRaw<Parameters>();
LOG_DEBUG(Service_AOC, "called with offset={}, count={}, process_id={}", offset, count,
process_id);
const auto offset = rp.PopRaw<u32>();
auto count = rp.PopRaw<u32>();
LOG_DEBUG(Service_AOC, "called with offset={}, count={}", offset, count);
const auto current = Core::System::GetInstance().CurrentProcess()->GetTitleID();
std::vector<u32> out;
const auto& disabled = Settings::values.disabled_addons[current];
if (std::find(disabled.begin(), disabled.end(), "DLC") == disabled.end()) {
for (u64 content_id : add_on_content) {
if ((content_id & DLC_BASE_TITLE_ID_MASK) != current) {
continue;
}
out.push_back(static_cast<u32>(content_id & 0x7FF));
}
for (size_t i = 0; i < add_on_content.size(); ++i) {
if ((add_on_content[i] & DLC_BASE_TITLE_ID_MASK) == current)
out.push_back(static_cast<u32>(add_on_content[i] & 0x7FF));
}
const auto& disabled = Settings::values.disabled_addons[current];
if (std::find(disabled.begin(), disabled.end(), "DLC") != disabled.end())
out = {};
if (out.size() < offset) {
IPC::ResponseBuilder rb{ctx, 2};
// TODO(DarkLordZach): Find the correct error code.
@@ -134,31 +119,22 @@ void AOC_U::ListAddOnContent(Kernel::HLERequestContext& ctx) {
return;
}
const auto out_count = static_cast<u32>(std::min<size_t>(out.size() - offset, count));
count = static_cast<u32>(std::min<size_t>(out.size() - offset, count));
std::rotate(out.begin(), out.begin() + offset, out.end());
out.resize(out_count);
out.resize(count);
ctx.WriteBuffer(out);
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push(out_count);
rb.Push(count);
}
void AOC_U::GetAddOnContentBaseId(Kernel::HLERequestContext& ctx) {
struct Parameters {
u64 process_id;
};
static_assert(sizeof(Parameters) == 8);
IPC::RequestParser rp{ctx};
const auto params = rp.PopRaw<Parameters>();
LOG_DEBUG(Service_AOC, "called. process_id={}", params.process_id);
LOG_DEBUG(Service_AOC, "called");
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(RESULT_SUCCESS);
const auto title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();
FileSys::PatchManager pm{title_id};
@@ -172,17 +148,10 @@ void AOC_U::GetAddOnContentBaseId(Kernel::HLERequestContext& ctx) {
}
void AOC_U::PrepareAddOnContent(Kernel::HLERequestContext& ctx) {
struct Parameters {
s32 addon_index;
u64 process_id;
};
static_assert(sizeof(Parameters) == 16);
IPC::RequestParser rp{ctx};
const auto [addon_index, process_id] = rp.PopRaw<Parameters>();
LOG_WARNING(Service_AOC, "(STUBBED) called with addon_index={}, process_id={}", addon_index,
process_id);
const auto aoc_id = rp.PopRaw<u32>();
LOG_WARNING(Service_AOC, "(STUBBED) called with aoc_id={:08X}", aoc_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);

View File

@@ -67,7 +67,7 @@ public:
// This is the event handle used to check if the audio buffer was released
auto& system = Core::System::GetInstance();
buffer_event = Kernel::WritableEvent::CreateEventPair(
system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased");
system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");
stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
audio_params.channel_count, std::move(unique_name),

View File

@@ -8,7 +8,6 @@
#include "audio_core/audio_renderer.h"
#include "common/alignment.h"
#include "common/bit_util.h"
#include "common/common_funcs.h"
#include "common/logging/log.h"
#include "common/string_util.h"
@@ -47,7 +46,7 @@ public:
auto& system = Core::System::GetInstance();
system_event = Kernel::WritableEvent::CreateEventPair(
system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent");
system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
system_event.writable);
}
@@ -179,7 +178,7 @@ public:
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IAudioOutBufferReleasedEvent");
}
@@ -263,304 +262,64 @@ void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
OpenAudioRendererImpl(ctx);
}
static u64 CalculateNumPerformanceEntries(const AudioCore::AudioRendererParameter& params) {
// +1 represents the final mix.
return u64{params.effect_count} + params.submix_count + params.sink_count + params.voice_count +
1;
}
void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
LOG_DEBUG(Service_Audio, "called");
// Several calculations below align the sizes being calculated
// onto a 64 byte boundary.
static constexpr u64 buffer_alignment_size = 64;
u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
buffer_sz += params.submix_count * 1024;
buffer_sz += 0x940 * (params.submix_count + 1);
buffer_sz += 0x3F0 * params.voice_count;
buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
buffer_sz += Common::AlignUp(
(0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
(params.mix_buffer_count + 6),
0x40);
// Some calculations that calculate portions of the buffer
// that will contain information, on the other hand, align
// the result of some of their calcularions on a 16 byte boundary.
static constexpr u64 info_field_alignment_size = 16;
// Maximum detail entries that may exist at one time for performance
// frame statistics.
static constexpr u64 max_perf_detail_entries = 100;
// Size of the data structure representing the bulk of the voice-related state.
static constexpr u64 voice_state_size = 0x100;
// Size of the upsampler manager data structure
constexpr u64 upsampler_manager_size = 0x48;
// Calculates the part of the size that relates to mix buffers.
const auto calculate_mix_buffer_sizes = [](const AudioCore::AudioRendererParameter& params) {
// As of 8.0.0 this is the maximum on voice channels.
constexpr u64 max_voice_channels = 6;
// The service expects the sample_count member of the parameters to either be
// a value of 160 or 240, so the maximum sample count is assumed in order
// to adequately handle all values at runtime.
constexpr u64 default_max_sample_count = 240;
const u64 total_mix_buffers = params.mix_buffer_count + max_voice_channels;
u64 size = 0;
size += total_mix_buffers * (sizeof(s32) * params.sample_count);
size += total_mix_buffers * (sizeof(s32) * default_max_sample_count);
size += u64{params.submix_count} + params.sink_count;
size = Common::AlignUp(size, buffer_alignment_size);
size += Common::AlignUp(params.unknown_30, buffer_alignment_size);
size += Common::AlignUp(sizeof(s32) * params.mix_buffer_count, buffer_alignment_size);
return size;
};
// Calculates the portion of the size related to the mix data (and the sorting thereof).
const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) {
// The size of the mixing info data structure.
constexpr u64 mix_info_size = 0x940;
// Consists of total submixes with the final mix included.
const u64 total_mix_count = u64{params.submix_count} + 1;
// The total number of effects that may be available to the audio renderer at any time.
constexpr u64 max_effects = 256;
// Calculates the part of the size related to the audio node state.
// This will only be used if the audio revision supports the splitter.
const auto calculate_node_state_size = [](std::size_t num_nodes) {
// Internally within a nodestate, it appears to use a data structure
// similar to a std::bitset<64> twice.
constexpr u64 bit_size = Common::BitSize<u64>();
constexpr u64 num_bitsets = 2;
// Node state instances have three states internally for performing
// depth-first searches of nodes. Initialized, Found, and Done Sorting.
constexpr u64 num_states = 3;
u64 size = 0;
size += (num_nodes * num_nodes) * sizeof(s32);
size += num_states * (num_nodes * sizeof(s32));
size += num_bitsets * (Common::AlignUp(num_nodes, bit_size) / Common::BitSize<u8>());
return size;
};
// Calculates the part of the size related to the adjacency (aka edge) matrix.
const auto calculate_edge_matrix_size = [](std::size_t num_nodes) {
return (num_nodes * num_nodes) * sizeof(s32);
};
u64 size = 0;
size += Common::AlignUp(sizeof(void*) * total_mix_count, info_field_alignment_size);
size += Common::AlignUp(mix_info_size * total_mix_count, info_field_alignment_size);
size += Common::AlignUp(sizeof(s32) * max_effects * params.submix_count,
info_field_alignment_size);
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
size += Common::AlignUp(calculate_node_state_size(total_mix_count) +
calculate_edge_matrix_size(total_mix_count),
info_field_alignment_size);
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
const u32 count = params.submix_count + 1;
u64 node_count = Common::AlignUp(count, 0x40);
const u64 node_state_buffer_sz =
4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
u64 edge_matrix_buffer_sz = 0;
node_count = Common::AlignUp(count * count, 0x40);
if (node_count >> 31 != 0) {
edge_matrix_buffer_sz = (node_count | 7) / 8;
} else {
edge_matrix_buffer_sz = node_count / 8;
}
buffer_sz += Common::AlignUp(node_state_buffer_sz + edge_matrix_buffer_sz, 0x10);
}
return size;
};
buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
buffer_sz += 0xE0 * params.num_splitter_send_channels;
buffer_sz += 0x20 * params.splitter_count;
buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
}
buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
((params.voice_count * 256) | 0x40);
// Calculates the part of the size related to voice channel info.
const auto calculate_voice_info_size = [](const AudioCore::AudioRendererParameter& params) {
constexpr u64 voice_info_size = 0x220;
constexpr u64 voice_resource_size = 0xD0;
u64 size = 0;
size += Common::AlignUp(sizeof(void*) * params.voice_count, info_field_alignment_size);
size += Common::AlignUp(voice_info_size * params.voice_count, info_field_alignment_size);
size +=
Common::AlignUp(voice_resource_size * params.voice_count, info_field_alignment_size);
size += Common::AlignUp(voice_state_size * params.voice_count, info_field_alignment_size);
return size;
};
// Calculates the part of the size related to memory pools.
const auto calculate_memory_pools_size = [](const AudioCore::AudioRendererParameter& params) {
const u64 num_memory_pools = sizeof(s32) * (u64{params.effect_count} + params.voice_count);
const u64 memory_pool_info_size = 0x20;
return Common::AlignUp(num_memory_pools * memory_pool_info_size, info_field_alignment_size);
};
// Calculates the part of the size related to the splitter context.
const auto calculate_splitter_context_size =
[this](const AudioCore::AudioRendererParameter& params) -> u64 {
if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
return 0;
}
constexpr u64 splitter_info_size = 0x20;
constexpr u64 splitter_destination_data_size = 0xE0;
u64 size = 0;
size += params.num_splitter_send_channels;
size +=
Common::AlignUp(splitter_info_size * params.splitter_count, info_field_alignment_size);
size += Common::AlignUp(splitter_destination_data_size * params.num_splitter_send_channels,
info_field_alignment_size);
return size;
};
// Calculates the part of the size related to the upsampler info.
const auto calculate_upsampler_info_size = [](const AudioCore::AudioRendererParameter& params) {
constexpr u64 upsampler_info_size = 0x280;
// Yes, using the buffer size over info alignment size is intentional here.
return Common::AlignUp(upsampler_info_size * (u64{params.submix_count} + params.sink_count),
buffer_alignment_size);
};
// Calculates the part of the size related to effect info.
const auto calculate_effect_info_size = [](const AudioCore::AudioRendererParameter& params) {
constexpr u64 effect_info_size = 0x2B0;
return Common::AlignUp(effect_info_size * params.effect_count, info_field_alignment_size);
};
// Calculates the part of the size related to audio sink info.
const auto calculate_sink_info_size = [](const AudioCore::AudioRendererParameter& params) {
const u64 sink_info_size = 0x170;
return Common::AlignUp(sink_info_size * params.sink_count, info_field_alignment_size);
};
// Calculates the part of the size related to voice state info.
const auto calculate_voice_state_size = [](const AudioCore::AudioRendererParameter& params) {
const u64 voice_state_size = 0x100;
const u64 additional_size = buffer_alignment_size - 1;
return Common::AlignUp(voice_state_size * params.voice_count + additional_size,
info_field_alignment_size);
};
// Calculates the part of the size related to performance statistics.
const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) {
// Extra size value appended to the end of the calculation.
constexpr u64 appended = 128;
// Whether or not we assume the newer version of performance metrics data structures.
const bool is_v2 =
IsFeatureSupported(AudioFeatures::PerformanceMetricsVersion2, params.revision);
// Data structure sizes
constexpr u64 perf_statistics_size = 0x0C;
const u64 header_size = is_v2 ? 0x30 : 0x18;
const u64 entry_size = is_v2 ? 0x18 : 0x10;
const u64 detail_size = is_v2 ? 0x18 : 0x10;
const u64 entry_count = CalculateNumPerformanceEntries(params);
const u64 size_per_frame =
header_size + (entry_size * entry_count) + (detail_size * max_perf_detail_entries);
u64 size = 0;
size += Common::AlignUp(size_per_frame * params.performance_frame_count + 1,
buffer_alignment_size);
size += Common::AlignUp(perf_statistics_size, buffer_alignment_size);
size += appended;
return size;
};
// Calculates the part of the size that relates to the audio command buffer.
const auto calculate_command_buffer_size =
[this](const AudioCore::AudioRendererParameter& params) {
constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) {
constexpr u64 command_buffer_size = 0x18000;
return command_buffer_size + alignment;
}
// When the variadic command buffer is supported, this means
// the command generator for the audio renderer can issue commands
// that are (as one would expect), variable in size. So what we need to do
// is determine the maximum possible size for a few command data structures
// then multiply them by the amount of present commands indicated by the given
// respective audio parameters.
constexpr u64 max_biquad_filters = 2;
constexpr u64 max_mix_buffers = 24;
constexpr u64 biquad_filter_command_size = 0x2C;
constexpr u64 depop_mix_command_size = 0x24;
constexpr u64 depop_setup_command_size = 0x50;
constexpr u64 effect_command_max_size = 0x540;
constexpr u64 mix_command_size = 0x1C;
constexpr u64 mix_ramp_command_size = 0x24;
constexpr u64 mix_ramp_grouped_command_size = 0x13C;
constexpr u64 perf_command_size = 0x28;
constexpr u64 sink_command_size = 0x130;
constexpr u64 submix_command_max_size =
depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers;
constexpr u64 volume_command_size = 0x1C;
constexpr u64 volume_ramp_command_size = 0x20;
constexpr u64 voice_biquad_filter_command_size =
biquad_filter_command_size * max_biquad_filters;
constexpr u64 voice_data_command_size = 0x9C;
const u64 voice_command_max_size =
(params.splitter_count * depop_setup_command_size) +
(voice_data_command_size + voice_biquad_filter_command_size +
volume_ramp_command_size + mix_ramp_grouped_command_size);
// Now calculate the individual elements that comprise the size and add them together.
const u64 effect_commands_size = params.effect_count * effect_command_max_size;
const u64 final_mix_commands_size =
depop_mix_command_size + volume_command_size * max_mix_buffers;
const u64 perf_commands_size =
perf_command_size *
(CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
const u64 sink_commands_size = params.sink_count * sink_command_size;
const u64 splitter_commands_size =
params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size;
const u64 submix_commands_size = params.submix_count * submix_command_max_size;
const u64 voice_commands_size = params.voice_count * voice_command_max_size;
return effect_commands_size + final_mix_commands_size + perf_commands_size +
sink_commands_size + splitter_commands_size + submix_commands_size +
voice_commands_size + alignment;
};
IPC::RequestParser rp{ctx};
const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
u64 size = 0;
size += calculate_mix_buffer_sizes(params);
size += calculate_mix_info_size(params);
size += calculate_voice_info_size(params);
size += upsampler_manager_size;
size += calculate_memory_pools_size(params);
size += calculate_splitter_context_size(params);
size = Common::AlignUp(size, buffer_alignment_size);
size += calculate_upsampler_info_size(params);
size += calculate_effect_info_size(params);
size += calculate_sink_info_size(params);
size += calculate_voice_state_size(params);
size += calculate_perf_size(params);
size += calculate_command_buffer_size(params);
// finally, 4KB page align the size, and we're done.
size = Common::AlignUp(size, 4096);
if (params.performance_frame_count >= 1) {
output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
16 * params.voice_count + 16) +
0x658) *
(params.performance_frame_count + 1) +
0xc0,
0x40) +
output_sz;
}
output_sz = Common::AlignUp(output_sz + 0x1807e, 0x1000);
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(RESULT_SUCCESS);
rb.Push<u64>(size);
LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", size);
rb.Push(RESULT_SUCCESS);
rb.Push<u64>(output_sz);
LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz);
}
void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
@@ -598,15 +357,10 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
}
bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
// Byte swap
const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0');
u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
switch (feature) {
case AudioFeatures::Splitter:
return version_num >= 2U;
case AudioFeatures::PerformanceMetricsVersion2:
case AudioFeatures::VariadicCommandBuffer:
return version_num >= 5U;
return version_num >= 2u;
default:
return false;
}

View File

@@ -28,8 +28,6 @@ private:
enum class AudioFeatures : u32 {
Splitter,
PerformanceMetricsVersion2,
VariadicCommandBuffer,
};
bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const;

View File

@@ -34,8 +34,8 @@ public:
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
register_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "BT:RegisterEvent");
register_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"BT:RegisterEvent");
}
private:

View File

@@ -57,13 +57,13 @@ public:
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IBtmUserCore:ScanEvent");
connection_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IBtmUserCore:ConnectionEvent");
kernel, Kernel::ResetType::OneShot, "IBtmUserCore:ConnectionEvent");
service_discovery = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IBtmUserCore:Discovery");
config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
kernel, Kernel::ResetType::OneShot, "IBtmUserCore:Discovery");
config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IBtmUserCore:ConfigEvent");
}

View File

@@ -170,7 +170,7 @@ void Controller_NPad::InitNewlyAddedControler(std::size_t controller_idx) {
void Controller_NPad::OnInit() {
auto& kernel = Core::System::GetInstance().Kernel();
styleset_changed_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "npad:NpadStyleSetChanged");
kernel, Kernel::ResetType::OneShot, "npad:NpadStyleSetChanged");
if (!IsControllerActivated()) {
return;

View File

@@ -26,7 +26,7 @@ constexpr ResultCode ERR_NO_APPLICATION_AREA(ErrorModule::NFP, 152);
Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
: ServiceFramework(name), module(std::move(module)) {
auto& kernel = Core::System::GetInstance().Kernel();
nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IUser:NFCTagDetected");
}
@@ -67,9 +67,9 @@ public:
auto& kernel = Core::System::GetInstance().Kernel();
deactivate_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IUser:DeactivateEvent");
kernel, Kernel::ResetType::OneShot, "IUser:DeactivateEvent");
availability_change_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IUser:AvailabilityChangeEvent");
kernel, Kernel::ResetType::OneShot, "IUser:AvailabilityChangeEvent");
}
private:

View File

@@ -62,9 +62,9 @@ public:
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IRequest:Event1");
event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"IRequest:Event2");
}

View File

@@ -141,7 +141,7 @@ public:
auto& kernel = Core::System::GetInstance().Kernel();
finished_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic,
kernel, Kernel::ResetType::OneShot,
"IEnsureNetworkClockAvailabilityService:FinishEvent");
}

View File

@@ -129,7 +129,7 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
RegisterHandlers(functions);
auto& kernel = Core::System::GetInstance().Kernel();
query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
"NVDRV::query_event");
}

View File

@@ -16,7 +16,7 @@ namespace Service::NVFlinger {
BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) {
auto& kernel = Core::System::GetInstance().Kernel();
buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
"BufferQueue NativeHandle");
}

View File

@@ -2,15 +2,16 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <chrono>
#include "common/logging/log.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/client_session.h"
#include "core/hle/service/set/set.h"
#include "core/settings.h"
namespace Service::Set {
namespace {
constexpr std::array<LanguageCode, 17> available_language_codes = {{
LanguageCode::JA,
LanguageCode::EN_US,
@@ -31,35 +32,41 @@ constexpr std::array<LanguageCode, 17> available_language_codes = {{
LanguageCode::ZH_HANT,
}};
constexpr std::size_t pre4_0_0_max_entries = 15;
constexpr std::size_t post4_0_0_max_entries = 17;
constexpr std::size_t pre4_0_0_max_entries = 0xF;
constexpr std::size_t post4_0_0_max_entries = 0x40;
constexpr ResultCode ERR_INVALID_LANGUAGE{ErrorModule::Settings, 625};
void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t num_language_codes) {
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push(static_cast<u32>(num_language_codes));
}
void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_size) {
const std::size_t requested_amount = ctx.GetWriteBufferSize() / sizeof(LanguageCode);
const std::size_t copy_amount = std::min(requested_amount, max_size);
const std::size_t copy_size = copy_amount * sizeof(LanguageCode);
ctx.WriteBuffer(available_language_codes.data(), copy_size);
PushResponseLanguageCode(ctx, copy_amount);
}
} // Anonymous namespace
LanguageCode GetLanguageCodeFromIndex(std::size_t index) {
return available_language_codes.at(index);
}
template <std::size_t size>
static std::array<LanguageCode, size> MakeLanguageCodeSubset() {
std::array<LanguageCode, size> arr;
std::copy_n(available_language_codes.begin(), size, arr.begin());
return arr;
}
static void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t max_size) {
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
if (available_language_codes.size() > max_size) {
rb.Push(static_cast<u32>(max_size));
} else {
rb.Push(static_cast<u32>(available_language_codes.size()));
}
}
void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SET, "called");
GetAvailableLanguageCodesImpl(ctx, pre4_0_0_max_entries);
if (available_language_codes.size() > pre4_0_0_max_entries) {
ctx.WriteBuffer(MakeLanguageCodeSubset<pre4_0_0_max_entries>());
} else {
ctx.WriteBuffer(available_language_codes);
}
PushResponseLanguageCode(ctx, pre4_0_0_max_entries);
}
void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
@@ -80,7 +87,12 @@ void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
void SET::GetAvailableLanguageCodes2(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SET, "called");
GetAvailableLanguageCodesImpl(ctx, post4_0_0_max_entries);
if (available_language_codes.size() > post4_0_0_max_entries) {
ctx.WriteBuffer(MakeLanguageCodeSubset<post4_0_0_max_entries>());
} else {
ctx.WriteBuffer(available_language_codes);
}
PushResponseLanguageCode(ctx, post4_0_0_max_entries);
}
void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) {
@@ -90,9 +102,9 @@ void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) {
}
void SET::GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SET, "called");
PushResponseLanguageCode(ctx, post4_0_0_max_entries);
LOG_DEBUG(Service_SET, "called");
}
void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) {

View File

@@ -17,7 +17,7 @@ namespace Service::VI {
Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
auto& kernel = Core::System::GetInstance().Kernel();
vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
fmt::format("Display VSync Event {}", id));
}

View File

@@ -556,7 +556,7 @@ private:
} else {
// Wait the current thread until a buffer becomes available
ctx.SleepClientThread(
"IHOSBinderDriver::DequeueBuffer", -1,
Kernel::GetCurrentThread(), "IHOSBinderDriver::DequeueBuffer", -1,
[=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
Kernel::ThreadWakeupReason reason) {
// Repeat TransactParcel DequeueBuffer when a buffer is available

View File

@@ -39,7 +39,7 @@ std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
const std::vector<u8> uncompressed_data =
Common::Compression::DecompressDataLZ4(compressed_data, header.size);
ASSERT_MSG(uncompressed_data.size() == header.size, "{} != {}", header.size,
ASSERT_MSG(uncompressed_data.size() == static_cast<int>(header.size), "{} != {}", header.size,
uncompressed_data.size());
return uncompressed_data;

View File

@@ -90,6 +90,7 @@ void LogSettings() {
LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
LogSetting("Renderer_UseCompatibilityProfile", Settings::values.use_compatibility_profile);
LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
LogSetting("Renderer_UseAsynchronousGpuEmulation",

View File

@@ -390,6 +390,7 @@ struct Values {
float resolution_factor;
bool use_frame_limit;
u16 frame_limit;
bool use_compatibility_profile;
bool use_disk_shader_cache;
bool use_accurate_gpu_emulation;
bool use_asynchronous_gpu_emulation;

View File

@@ -42,6 +42,8 @@ add_library(video_core STATIC
renderer_opengl/gl_device.h
renderer_opengl/gl_global_cache.cpp
renderer_opengl/gl_global_cache.h
renderer_opengl/gl_primitive_assembler.cpp
renderer_opengl/gl_primitive_assembler.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_rasterizer_cache.cpp
@@ -87,6 +89,7 @@ add_library(video_core STATIC
shader/decode/conversion.cpp
shader/decode/memory.cpp
shader/decode/texture.cpp
shader/decode/image.cpp
shader/decode/float_set_predicate.cpp
shader/decode/integer_set_predicate.cpp
shader/decode/half_set_predicate.cpp

View File

@@ -40,13 +40,6 @@ bool DmaPusher::Step() {
}
const CommandList& command_list{dma_pushbuffer.front()};
ASSERT_OR_EXECUTE(!command_list.empty(), {
// Somehow the command_list is empty, in order to avoid a crash
// We ignore it and assume its size is 0.
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
return true;
});
const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
GPUVAddr dma_get = command_list_header.addr;
GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);

View File

@@ -2,8 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include "common/assert.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h"
@@ -12,9 +10,7 @@
namespace Tegra::Engines::Upload {
State::State(MemoryManager& memory_manager, Registers& regs)
: regs{regs}, memory_manager{memory_manager} {}
State::~State() = default;
: memory_manager(memory_manager), regs(regs) {}
void State::ProcessExec(const bool is_linear) {
write_offset = 0;

View File

@@ -4,8 +4,10 @@
#pragma once
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra {
@@ -55,10 +57,10 @@ struct Registers {
class State {
public:
State(MemoryManager& memory_manager, Registers& regs);
~State();
~State() = default;
void ProcessExec(bool is_linear);
void ProcessData(u32 data, bool is_last_call);
void ProcessExec(const bool is_linear);
void ProcessData(const u32 data, const bool is_last_call);
private:
u32 write_offset = 0;

View File

@@ -34,9 +34,9 @@ void Maxwell3D::InitializeRegisterDefaults() {
// Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
// needed for ARMS.
for (auto& viewport : regs.viewports) {
viewport.depth_range_near = 0.0f;
viewport.depth_range_far = 1.0f;
for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) {
regs.viewports[viewport].depth_range_near = 0.0f;
regs.viewports[viewport].depth_range_far = 1.0f;
}
// Doom and Bomberman seems to use the uninitialized registers and just enable blend
@@ -47,13 +47,13 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.blend.equation_a = Regs::Blend::Equation::Add;
regs.blend.factor_source_a = Regs::Blend::Factor::One;
regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
for (auto& blend : regs.independent_blend) {
blend.equation_rgb = Regs::Blend::Equation::Add;
blend.factor_source_rgb = Regs::Blend::Factor::One;
blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
blend.equation_a = Regs::Blend::Equation::Add;
blend.factor_source_a = Regs::Blend::Factor::One;
blend.factor_dest_a = Regs::Blend::Factor::Zero;
for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) {
regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add;
regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One;
regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero;
regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add;
regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One;
regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero;
}
regs.stencil_front_op_fail = Regs::StencilOp::Keep;
regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
@@ -75,11 +75,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
// TODO(bunnei): Some games do not initialize the color masks (e.g. Sonic Mania). Assuming a
// default of enabled fixes rendering here.
for (auto& color_mask : regs.color_mask) {
color_mask.R.Assign(1);
color_mask.G.Assign(1);
color_mask.B.Assign(1);
color_mask.A.Assign(1);
for (std::size_t color_mask = 0; color_mask < Regs::NumRenderTargets; color_mask++) {
regs.color_mask[color_mask].R.Assign(1);
regs.color_mask[color_mask].G.Assign(1);
regs.color_mask[color_mask].B.Assign(1);
regs.color_mask[color_mask].A.Assign(1);
}
// Commercial games seem to assume this value is enabled and nouveau sets this value manually.
@@ -178,13 +178,13 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
// Vertex buffer
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
}
}
@@ -432,17 +432,13 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
"TIC versions other than BlockLinear or Pitch are unimplemented");
const auto r_type = tic_entry.r_type.Value();
const auto g_type = tic_entry.g_type.Value();
const auto b_type = tic_entry.b_type.Value();
const auto a_type = tic_entry.a_type.Value();
const auto r_type{tic_entry.r_type.Value()};
const auto g_type{tic_entry.g_type.Value()};
const auto b_type{tic_entry.b_type.Value()};
const auto a_type{tic_entry.a_type.Value()};
// TODO(Subv): Different data types for separate components are not supported
DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
return tic_entry;
}

View File

@@ -6,7 +6,6 @@
#include <array>
#include <bitset>
#include <type_traits>
#include <unordered_map>
#include <vector>
@@ -59,7 +58,6 @@ public:
static constexpr std::size_t NumCBData = 16;
static constexpr std::size_t NumVertexArrays = 32;
static constexpr std::size_t NumVertexAttributes = 32;
static constexpr std::size_t NumVaryings = 31;
static constexpr std::size_t NumTextureSamplers = 32;
static constexpr std::size_t NumClipDistances = 8;
static constexpr std::size_t MaxShaderProgram = 6;
@@ -1109,7 +1107,6 @@ public:
} regs{};
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
struct State {
struct ConstBufferInfo {

View File

@@ -98,10 +98,6 @@ union Attribute {
BitField<22, 2, u64> element;
BitField<24, 6, Index> index;
BitField<47, 3, AttributeSize> size;
bool IsPhysical() const {
return element == 0 && static_cast<u64>(index.Value()) == 0;
}
} fmt20;
union {
@@ -126,6 +122,15 @@ union Sampler {
u64 value{};
};
union Image {
Image() = default;
constexpr explicit Image(u64 value) : value{value} {}
BitField<36, 13, u64> index;
u64 value;
};
} // namespace Tegra::Shader
namespace std {
@@ -344,6 +349,26 @@ enum class TextureMiscMode : u64 {
PTP,
};
enum class SurfaceDataMode : u64 {
P = 0,
D_BA = 1,
};
enum class OutOfBoundsStore : u64 {
Ignore = 0,
Clamp = 1,
Trap = 2,
};
enum class ImageType : u64 {
Texture1D = 0,
TextureBuffer = 1,
Texture1DArray = 2,
Texture2D = 3,
Texture2DArray = 4,
Texture3D = 5,
};
enum class IsberdMode : u64 {
None = 0,
Patch = 1,
@@ -398,7 +423,7 @@ enum class LmemLoadCacheManagement : u64 {
CV = 3,
};
enum class LmemStoreCacheManagement : u64 {
enum class StoreCacheManagement : u64 {
Default = 0,
CG = 1,
CS = 2,
@@ -503,11 +528,6 @@ enum class SystemVariable : u64 {
CircularQueueEntryAddressHigh = 0x63,
};
enum class PhysicalAttributeDirection : u64 {
Input = 0,
Output = 1,
};
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -529,11 +549,6 @@ union Instruction {
BitField<39, 8, Register> gpr39;
BitField<48, 16, u64> opcode;
union {
BitField<8, 8, Register> gpr;
BitField<20, 24, s64> offset;
} gmem;
union {
BitField<20, 16, u64> imm20_16;
BitField<20, 19, u64> imm20_19;
@@ -601,7 +616,6 @@ union Instruction {
} alu;
union {
BitField<38, 1, u64> idx;
BitField<51, 1, u64> saturate;
BitField<52, 2, IpaSampleMode> sample_mode;
BitField<54, 2, IpaInterpMode> interp_mode;
@@ -811,30 +825,21 @@ union Instruction {
} ld_l;
union {
BitField<44, 2, LmemStoreCacheManagement> cache_management;
BitField<44, 2, StoreCacheManagement> cache_management;
} st_l;
union {
BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> immediate_offset;
} ldg;
union {
BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> immediate_offset;
} stg;
union {
BitField<32, 1, PhysicalAttributeDirection> direction;
BitField<47, 3, AttributeSize> size;
BitField<20, 11, u64> address;
} al2p;
union {
BitField<53, 3, UniformType> type;
BitField<52, 1, u64> extended;
} generic;
union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
@@ -1231,6 +1236,20 @@ union Instruction {
}
} texs;
union {
BitField<28, 1, u64> is_array;
BitField<29, 2, TextureType> texture_type;
BitField<35, 1, u64> aoffi;
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> ms; // Multisample?
BitField<54, 1, u64> cl;
BitField<55, 1, u64> process_mode;
TextureProcessMode GetTextureProcessMode() const {
return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
}
} tld;
union {
BitField<49, 1, u64> nodep_flag;
BitField<53, 4, u64> texture_info;
@@ -1280,6 +1299,35 @@ union Instruction {
}
} tlds;
union {
BitField<24, 2, StoreCacheManagement> cache_management;
BitField<33, 3, ImageType> image_type;
BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
BitField<51, 1, u64> is_immediate;
BitField<52, 1, SurfaceDataMode> mode;
BitField<20, 3, StoreType> store_data_layout;
BitField<20, 4, u64> component_mask_selector;
bool IsComponentEnabled(std::size_t component) const {
ASSERT(mode == SurfaceDataMode::P);
constexpr u8 R = 0b0001;
constexpr u8 G = 0b0010;
constexpr u8 B = 0b0100;
constexpr u8 A = 0b1000;
constexpr std::array<u8, 16> mask = {
0, (R), (G), (R | G), (B), (R | B),
(G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A),
(B | A), (R | B | A), (G | B | A), (R | G | B | A)};
return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
}
StoreType GetStoreDataLayout() const {
ASSERT(mode == SurfaceDataMode::D_BA);
return store_data_layout;
}
} sust;
union {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
@@ -1371,6 +1419,7 @@ union Instruction {
Attribute attribute;
Sampler sampler;
Image image;
u64 value;
};
@@ -1395,24 +1444,23 @@ public:
LD_L,
LD_S,
LD_C,
LD, // Load from generic memory
LDG, // Load from global memory
ST_A,
ST_L,
ST_S,
ST, // Store in generic memory
STG, // Store in global memory
AL2P, // Transforms attribute memory into physical memory
LDG, // Load from global memory
STG, // Store in global memory
TEX,
TEX_B, // Texture Load Bindless
TXQ, // Texture Query
TXQ_B, // Texture Query Bindless
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
TLD, // Texture Load
TLDS, // Texture Load with scalar/non-vec4 source/destinations
TLD4, // Texture Load 4
TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
TMML_B, // Texture Mip Map Level
TMML, // Texture Mip Map Level
SUST, // Surface Store
EXIT,
IPA,
OUT_R, // Emit vertex/primitive
@@ -1543,6 +1591,7 @@ public:
Synch,
Memory,
Texture,
Image,
FloatSet,
FloatSetPredicate,
IntegerSet,
@@ -1668,24 +1717,23 @@ private:
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("100-------------", Id::LD, Type::Memory, "LD"),
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("101-------------", Id::ST, Type::Memory, "ST"),
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),

View File

@@ -118,7 +118,7 @@ void SynchState::WaitForSynchronization(u64 fence) {
// Wait for the GPU to be idle (all commands to be executed)
{
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock lock{synchronization_mutex};
std::unique_lock<std::mutex> lock{synchronization_mutex};
synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
}
}

View File

@@ -81,6 +81,12 @@ struct CommandDataContainer {
CommandDataContainer(CommandData&& data, u64 next_fence)
: data{std::move(data)}, fence{next_fence} {}
CommandDataContainer& operator=(const CommandDataContainer& t) {
data = std::move(t.data);
fence = t.fence;
return *this;
}
CommandData data;
u64 fence{};
};
@@ -103,7 +109,7 @@ struct SynchState final {
void TrySynchronize() {
if (IsSynchronized()) {
std::lock_guard lock{synchronization_mutex};
std::lock_guard<std::mutex> lock{synchronization_mutex};
synchronization_condition.notify_one();
}
}

View File

@@ -118,12 +118,10 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
static_cast<u32>(opcode.operation.Value()));
}
// An instruction with the Exit flag will not actually
// cause an exit if it's executed inside a delay slot.
// TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further
// testing on the MME code.
if (opcode.is_exit) {
// Exit has a delay slot, execute the next instruction
// Note: Executing an exit during a branch delay slot will cause the instruction at the
// branch target to be executed before exiting.
Step(offset, true);
return false;
}

View File

@@ -144,9 +144,8 @@ protected:
object->SetIsRegistered(false);
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
const CacheAddr addr = object->GetCacheAddr();
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
map_cache.erase(addr);
map_cache.erase(object->GetCacheAddr());
}
/// Returns a ticks counter used for tracking when cached objects were last modified

View File

@@ -71,6 +71,16 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s
return uploaded_offset;
}
std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) {
AlignBuffer(alignment);
u8* const uploaded_ptr = buffer_ptr;
const GLintptr uploaded_offset = buffer_offset;
buffer_ptr += size;
buffer_offset += size;
return std::make_tuple(uploaded_ptr, uploaded_offset);
}
bool OGLBufferCache::Map(std::size_t max_size) {
bool invalidate;
std::tie(buffer_ptr, buffer_offset_base, invalidate) =

View File

@@ -61,6 +61,9 @@ public:
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
bool Map(std::size_t max_size);
void Unmap();

View File

@@ -21,21 +21,16 @@ T GetInteger(GLenum pname) {
Device::Device() {
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
has_variable_aoffi = TestVariableAoffi();
}
Device::Device(std::nullptr_t) {
uniform_buffer_alignment = 0;
max_vertex_attributes = 16;
max_varyings = 15;
has_variable_aoffi = true;
}
bool Device::TestVariableAoffi() {
const GLchar* AOFFI_TEST = R"(#version 430 core
// This is a unit test, please ignore me on apitrace bug reports.
uniform sampler2D tex;
uniform ivec2 variable_offset;
void main() {

View File

@@ -5,7 +5,6 @@
#pragma once
#include <cstddef>
#include "common/common_types.h"
namespace OpenGL {
@@ -18,14 +17,6 @@ public:
return uniform_buffer_alignment;
}
u32 GetMaxVertexAttributes() const {
return max_vertex_attributes;
}
u32 GetMaxVaryings() const {
return max_varyings;
}
bool HasVariableAoffi() const {
return has_variable_aoffi;
}
@@ -34,8 +25,6 @@ private:
static bool TestVariableAoffi();
std::size_t uniform_buffer_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
bool has_variable_aoffi{};
};

View File

@@ -0,0 +1,63 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
namespace OpenGL {
constexpr u32 TRIANGLES_PER_QUAD = 6;
constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3};
PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {}
PrimitiveAssembler::~PrimitiveAssembler() = default;
std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const {
ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4");
return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint);
}
GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
const std::size_t size{CalculateQuadSize(count)};
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size);
for (u32 primitive = 0; primitive < count / 4; ++primitive) {
for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) {
const u32 index = first + primitive * 4 + QUAD_MAP[i];
std::memcpy(dst_pointer, &index, sizeof(index));
dst_pointer += sizeof(index);
}
}
return index_offset;
}
GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
const std::size_t map_size{CalculateQuadSize(count)};
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const u8* source{memory_manager.GetPointer(gpu_addr)};
for (u32 primitive = 0; primitive < count / 4; ++primitive) {
for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
const u32 index = primitive * 4 + QUAD_MAP[i];
const u8* src_offset = source + (index * index_size);
std::memcpy(dst_pointer, src_offset, index_size);
dst_pointer += index_size;
}
}
return index_offset;
}
} // namespace OpenGL

View File

@@ -0,0 +1,31 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <glad/glad.h>
#include "common/common_types.h"
namespace OpenGL {
class OGLBufferCache;
class PrimitiveAssembler {
public:
explicit PrimitiveAssembler(OGLBufferCache& buffer_cache);
~PrimitiveAssembler();
/// Calculates the size required by MakeQuadArray and MakeQuadIndexed.
std::size_t CalculateQuadSize(u32 count) const;
GLintptr MakeQuadArray(u32 first, u32 count);
GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
private:
OGLBufferCache& buffer_cache;
};
} // namespace OpenGL

View File

@@ -29,8 +29,10 @@
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = VideoCore::Surface::PixelFormat;
using SurfaceType = VideoCore::Surface::SurfaceType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
@@ -98,11 +100,9 @@ struct FramebufferCacheKey {
}
};
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info)
: res_cache{*this}, shader_cache{*this, system, emu_window, device},
global_cache{*this}, system{system}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE) {
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
: res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system},
screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
OpenGLState::ApplyDefaultState();
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
@@ -121,11 +121,6 @@ void RasterizerOpenGL::CheckExtensions() {
Render_OpenGL,
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
}
if (!GLAD_GL_ARB_buffer_storage) {
LOG_WARNING(
Render_OpenGL,
"Buffer storage control is not supported! This can cause performance degradation.");
}
}
GLuint RasterizerOpenGL::SetupVertexFormat() {
@@ -246,6 +241,29 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
DrawParameters params{};
params.current_instance = gpu.state.current_instance;
if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly);
params.use_indexed = true;
params.primitive_mode = GL_TRIANGLES;
if (is_indexed) {
params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
params.count = (regs.index_array.count / 4) * 6;
params.index_buffer_offset = primitive_assembler.MakeQuadIndexed(
regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(),
regs.index_array.count);
params.base_vertex = static_cast<GLint>(regs.vb_element_base);
} else {
// MakeQuadArray always generates u32 indexes
params.index_format = GL_UNSIGNED_INT;
params.count = (regs.vertex_buffer.count / 4) * 6;
params.index_buffer_offset =
primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
}
return params;
}
params.use_indexed = is_indexed;
params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
@@ -302,8 +320,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)};
const auto [program_handle, next_bindings] =
shader->GetProgramHandle(primitive_mode, base_bindings);
const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)};
SetupConstBuffers(stage_enum, shader, base_bindings);
SetupGlobalRegions(stage_enum, shader, base_bindings);
const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -321,11 +345,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
shader_config.enable.Value(), shader_config.offset);
}
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
SetupTextures(stage_enum, shader, program_handle, base_bindings);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -663,19 +682,30 @@ void RasterizerOpenGL::DrawArrays() {
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest(state);
// Alpha Testing is synced on shaders.
SyncTransformFeedback();
SyncPointState();
CheckAlphaTests();
SyncPolygonOffset();
SyncAlphaTest();
// TODO(bunnei): Sync framebuffer_scale uniform here
// TODO(bunnei): Sync scissorbox uniform(s) here
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
std::size_t buffer_size = CalculateVertexArraysSize();
// Add space for index buffer
if (is_indexed) {
buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
// Add space for index buffer (keeping in mind non-core primitives)
switch (regs.draw.topology) {
case Maxwell::PrimitiveTopology::Quads:
buffer_size = Common::AlignUp(buffer_size, 4) +
primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count);
break;
default:
if (is_indexed) {
buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
}
break;
}
// Uniform space for the 5 shader stages
@@ -777,8 +807,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
}
void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLuint program_handle,
BaseBindings base_bindings) {
const Shader& shader, BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -825,8 +854,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
}
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings) {
const Shader& shader, BaseBindings base_bindings) {
const auto& entries = shader->GetShaderEntries().global_memory_entries;
for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry{entries[bindpoint]};
@@ -839,8 +867,8 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
}
}
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings) {
TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -849,6 +877,8 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
"Exceeded the number of active textures.");
TextureBufferUsage texture_buffer_usage{0};
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
Tegra::Texture::FullTextureInfo texture;
@@ -862,18 +892,25 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
}
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc);
auto& unit{state.texture_units[current_bindpoint]};
unit.sampler = sampler_cache.GetSampler(texture.tsc);
if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
state.texture_units[current_bindpoint].texture =
surface->Texture(entry.IsArray()).handle;
if (surface->GetSurfaceParams().target == SurfaceTarget::TextureBuffer) {
// Record that this texture is a texture buffer.
texture_buffer_usage.set(bindpoint);
}
unit.texture = surface->Texture(entry.IsArray()).handle;
surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
texture.tic.w_source);
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
state.texture_units[current_bindpoint].texture = 0;
unit.texture = 0;
}
}
return texture_buffer_usage;
}
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
@@ -1103,9 +1140,7 @@ void RasterizerOpenGL::SyncTransformFeedback() {
void RasterizerOpenGL::SyncPointState() {
const auto& regs = system.GPU().Maxwell3D().regs;
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL).
state.point.size = std::max(1.0f, regs.point_size);
state.point.size = regs.point_size;
}
void RasterizerOpenGL::SyncPolygonOffset() {
@@ -1118,17 +1153,10 @@ void RasterizerOpenGL::SyncPolygonOffset() {
state.polygon_offset.clamp = regs.polygon_offset_clamp;
}
void RasterizerOpenGL::SyncAlphaTest() {
void RasterizerOpenGL::CheckAlphaTests() {
const auto& regs = system.GPU().Maxwell3D().regs;
UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
"Alpha Testing is enabled with more than one rendertarget");
state.alpha_test.enabled = regs.alpha_test_enabled;
if (!state.alpha_test.enabled) {
return;
}
state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func);
state.alpha_test.ref = regs.alpha_test_ref;
}
} // namespace OpenGL

View File

@@ -23,6 +23,7 @@
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_sampler_cache.h"
@@ -47,8 +48,7 @@ struct FramebufferCacheKey;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info);
explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info);
~RasterizerOpenGL() override;
void DrawArrays() override;
@@ -106,16 +106,16 @@ private:
/// Configures the current constbuffers to use for the draw command.
void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
BaseBindings base_bindings);
/// Configures the current global memory entries to use for the draw command.
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings);
const Shader& shader, BaseBindings base_bindings);
/// Configures the current textures to use for the draw command.
void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
/// Configures the current textures to use for the draw command. Returns shaders texture buffer
/// usage.
TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, BaseBindings base_bindings);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);
@@ -166,8 +166,8 @@ private:
/// Syncs the polygon offsets
void SyncPolygonOffset();
/// Syncs the alpha test state to match the guest state
void SyncAlphaTest();
/// Check asserts for alpha testing.
void CheckAlphaTests();
/// Check for extension that are not strictly required
/// but are needed for correct emulation
@@ -196,6 +196,7 @@ private:
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
PrimitiveAssembler primitive_assembler{buffer_cache};
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};

View File

@@ -140,7 +140,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
if (!params.is_tiled) {
if (config.tic.IsLineal()) {
params.pitch = config.tic.Pitch();
}
params.unaligned_height = config.tic.Height();
@@ -149,6 +149,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
switch (params.target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::TextureBuffer:
case SurfaceTarget::Texture2D:
params.depth = 1;
break;
@@ -389,6 +390,8 @@ static GLenum SurfaceTargetToGL(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
return GL_TEXTURE_1D;
case SurfaceTarget::TextureBuffer:
return GL_TEXTURE_BUFFER;
case SurfaceTarget::Texture2D:
return GL_TEXTURE_2D;
case SurfaceTarget::Texture3D:
@@ -600,29 +603,35 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
switch (params.target) {
case SurfaceTarget::Texture1D:
glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width);
glTextureStorage1D(texture.handle, params.max_mip_level, gl_internal_format, width);
break;
case SurfaceTarget::TextureBuffer:
texture_buffer.Create();
glNamedBufferStorage(texture_buffer.handle,
params.width * GetBytesPerPixel(params.pixel_format), nullptr,
GL_DYNAMIC_STORAGE_BIT);
glTextureBuffer(texture.handle, gl_internal_format, texture_buffer.handle);
break;
case SurfaceTarget::Texture2D:
case SurfaceTarget::TextureCubemap:
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height);
glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height, params.depth);
glTextureStorage3D(texture.handle, params.max_mip_level, gl_internal_format, width, height,
params.depth);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height);
glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
}
ApplyTextureDefaults(texture.handle, params.max_mip_level);
if (params.target != SurfaceTarget::TextureBuffer) {
ApplyTextureDefaults(texture.handle, params.max_mip_level);
}
OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
}
@@ -785,6 +794,13 @@ void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_t
glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::TextureBuffer:
ASSERT(mip_map == 0);
glNamedBufferSubData(texture_buffer.handle, x0,
static_cast<GLsizeiptr>(rect.GetWidth()) *
GetBytesPerPixel(params.pixel_format),
&gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture2D:
glTextureSubImage2D(texture.handle, mip_map, x0, y0,
static_cast<GLsizei>(rect.GetWidth()),
@@ -860,6 +876,9 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
Tegra::Texture::SwizzleSource swizzle_y,
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w) {
if (params.target == SurfaceTarget::TextureBuffer) {
return;
}
const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);

View File

@@ -250,6 +250,8 @@ struct SurfaceParams {
switch (target) {
case SurfaceTarget::Texture1D:
return "1D";
case SurfaceTarget::TextureBuffer:
return "Buffer";
case SurfaceTarget::Texture2D:
return "2D";
case SurfaceTarget::Texture3D:
@@ -439,6 +441,7 @@ private:
OGLTexture texture;
OGLTexture discrepant_view;
OGLBuffer texture_buffer;
SurfaceParams params{};
GLenum gl_target{};
GLenum gl_internal_format{};

View File

@@ -2,14 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <mutex>
#include <thread>
#include <boost/functional/hash.hpp>
#include "common/assert.h"
#include "common/hash.h"
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -168,10 +164,13 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
}
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
GLenum primitive_mode, bool hint_retrievable = false) {
std::string source = "#version 430 core\n"
"#extension GL_ARB_separate_shader_objects : enable\n\n";
Maxwell::ShaderProgram program_type, const ProgramVariant& variant,
bool hint_retrievable = false) {
auto base_bindings{variant.base_bindings};
const auto primitive_mode{variant.primitive_mode};
const auto texture_buffer_usage{variant.texture_buffer_usage};
std::string source = "#version 430 core\n";
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
for (const auto& cbuf : entries.const_buffers) {
@@ -186,6 +185,18 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
base_bindings.sampler++);
}
for (const auto& image : entries.images) {
source +=
fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
}
// Transform 1D textures to texture samplers by declaring its preprocessor macros.
for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) {
if (!texture_buffer_usage.test(i)) {
continue;
}
source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
}
if (program_type == Maxwell::ShaderProgram::Geometry) {
const auto [glsl_topology, debug_name, max_vertices] =
@@ -261,20 +272,18 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
shader_length = entries.shader_length;
}
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings) {
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
GLuint handle{};
if (program_type == Maxwell::ShaderProgram::Geometry) {
handle = GetGeometryShader(primitive_mode, base_bindings);
handle = GetGeometryShader(variant);
} else {
const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
const auto [entry, is_cache_miss] = programs.try_emplace(variant);
auto& program = entry->second;
if (is_cache_miss) {
program = TryLoadProgram(primitive_mode, base_bindings);
program = TryLoadProgram(variant);
if (!program) {
program =
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
program = SpecializeShader(code, entries, program_type, variant);
disk_cache.SaveUsage(GetUsage(variant));
}
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
@@ -283,6 +292,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
handle = program->handle;
}
auto base_bindings{variant.base_bindings};
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
@@ -290,43 +300,42 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
return {handle, base_bindings};
}
GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
GLuint CachedShader::GetGeometryShader(const ProgramVariant& variant) {
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(variant);
auto& programs = entry->second;
switch (primitive_mode) {
switch (variant.primitive_mode) {
case GL_POINTS:
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.points, variant);
case GL_LINES:
case GL_LINE_STRIP:
return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.lines, variant);
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.lines_adjacency, variant);
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.triangles, variant);
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.triangles_adjacency, variant);
default:
UNREACHABLE_MSG("Unknown primitive mode.");
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.points, variant);
}
}
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
GLenum primitive_mode) {
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program,
const ProgramVariant& variant) {
if (target_program) {
return target_program->handle;
}
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
target_program = TryLoadProgram(primitive_mode, base_bindings);
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(variant.primitive_mode);
target_program = TryLoadProgram(variant);
if (!target_program) {
target_program =
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
target_program = SpecializeShader(code, entries, program_type, variant);
disk_cache.SaveUsage(GetUsage(variant));
}
LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
@@ -334,23 +343,24 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
return target_program->handle;
};
CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
BaseBindings base_bindings) const {
const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
const auto found = precompiled_programs.find(GetUsage(variant));
if (found == precompiled_programs.end()) {
return {};
}
return found->second;
}
ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
BaseBindings base_bindings) const {
return {unique_identifier, base_bindings, primitive_mode};
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
ShaderDiskCacheUsage usage;
usage.unique_identifier = unique_identifier;
usage.variant = variant;
return usage;
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
Core::Frontend::EmuWindow& emu_window, const Device& device)
: RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {}
const Device& device)
: RasterizerCache{rasterizer}, device{device}, disk_cache{system} {}
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
@@ -358,107 +368,62 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
if (!transferable) {
return;
}
const auto [raws, shader_usages] = *transferable;
const auto [raws, usages] = *transferable;
auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
const auto supported_formats{GetSupportedFormats()};
const auto unspecialized_shaders{
const auto unspecialized{
GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
if (stop_loading) {
if (stop_loading)
return;
}
// Track if precompiled cache was altered during loading to know if we have to serialize the
// virtual precompiled cache file back to the hard drive
bool precompiled_cache_altered = false;
// Inform the frontend about shader build initialization
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size());
}
// Build shaders
if (callback)
callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
for (std::size_t i = 0; i < usages.size(); ++i) {
if (stop_loading)
return;
std::mutex mutex;
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
std::atomic_bool compilation_failed = false;
const auto& usage{usages[i]};
LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier,
i + 1, usages.size());
const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages,
const ShaderDumpsMap& dumps) {
context->MakeCurrent();
SCOPE_EXIT({ return context->DoneCurrent(); });
const auto& unspec{unspecialized.at(usage.unique_identifier)};
const auto dump_it = dumps.find(usage);
for (std::size_t i = begin; i < end; ++i) {
if (stop_loading || compilation_failed) {
return;
}
const auto& usage{shader_usages[i]};
LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})",
usage.unique_identifier, i, shader_usages.size());
const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
const auto dump{dumps.find(usage)};
CachedProgram shader;
if (dump != dumps.end()) {
// If the shader is dumped, attempt to load it with
shader = GeneratePrecompiledProgram(dump->second, supported_formats);
if (!shader) {
compilation_failed = true;
return;
}
}
CachedProgram shader;
if (dump_it != dumps.end()) {
// If the shader is dumped, attempt to load it with
shader = GeneratePrecompiledProgram(dump_it->second, supported_formats);
if (!shader) {
shader = SpecializeShader(unspecialized.code, unspecialized.entries,
unspecialized.program_type, usage.bindings,
usage.primitive, true);
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
precompiled_cache_altered = true;
dumps.clear();
}
std::scoped_lock lock(mutex);
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
shader_usages.size());
}
precompiled_programs.emplace(usage, std::move(shader));
}
};
if (!shader) {
shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
usage.variant, true);
}
precompiled_programs.insert({usage, std::move(shader)});
const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)};
const std::size_t bucket_size{shader_usages.size() / num_workers};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size};
// On some platforms the shared context has to be created from the GUI thread
contexts[i] = emu_window.CreateSharedContext();
threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps);
}
for (auto& thread : threads) {
thread.join();
}
if (compilation_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
dumps.clear();
precompiled_cache_altered = true;
return;
}
if (stop_loading) {
return;
if (callback)
callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size());
}
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
// precompiling them
for (std::size_t i = 0; i < shader_usages.size(); ++i) {
const auto& usage{shader_usages[i]};
for (std::size_t i = 0; i < usages.size(); ++i) {
const auto& usage{usages[i]};
if (dumps.find(usage) == dumps.end()) {
const auto& program{precompiled_programs.at(usage)};
const auto& program = precompiled_programs.at(usage);
disk_cache.SaveDump(usage, program->handle);
precompiled_cache_altered = true;
}

View File

@@ -6,6 +6,7 @@
#include <array>
#include <atomic>
#include <bitset>
#include <memory>
#include <set>
#include <tuple>
@@ -24,10 +25,6 @@ namespace Core {
class System;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace OpenGL {
class CachedShader;
@@ -67,8 +64,7 @@ public:
}
/// Gets the GL program handle for the shader
std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings);
std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
private:
// Geometry programs. These are needed because GLSL needs an input topology but it's not
@@ -82,15 +78,14 @@ private:
CachedProgram triangles_adjacency;
};
GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
GLuint GetGeometryShader(const ProgramVariant& variant);
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
GLenum primitive_mode);
GLuint LazyGeometryProgram(CachedProgram& target_program, const ProgramVariant& variant);
CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
CachedProgram TryLoadProgram(const ProgramVariant& variant) const;
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
u8* host_ptr{};
VAddr cpu_addr{};
@@ -104,8 +99,8 @@ private:
std::string code;
std::unordered_map<BaseBindings, CachedProgram> programs;
std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
std::unordered_map<ProgramVariant, CachedProgram> programs;
std::unordered_map<ProgramVariant, GeometryPrograms> geometry_programs;
std::unordered_map<u32, GLuint> cbuf_resource_cache;
std::unordered_map<u32, GLuint> gmem_resource_cache;
@@ -115,7 +110,7 @@ private:
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
public:
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
Core::Frontend::EmuWindow& emu_window, const Device& device);
const Device& device);
/// Loads disk cache for the current game
void LoadDiskCache(const std::atomic_bool& stop_loading,
@@ -137,13 +132,13 @@ private:
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::set<GLenum>& supported_formats);
Core::Frontend::EmuWindow& emu_window;
const Device& device;
ShaderDiskCacheOpenGL disk_cache;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
ShaderDiskCacheOpenGL disk_cache;
PrecompiledShaders precompiled_shaders;
PrecompiledPrograms precompiled_programs;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
};
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@@ -27,6 +27,7 @@ struct ShaderEntries;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ProgramResult = std::pair<std::string, ShaderEntries>;
using SamplerEntry = VideoCommon::Shader::Sampler;
using ImageEntry = VideoCommon::Shader::Image;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
@@ -74,6 +75,7 @@ struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<SamplerEntry> samplers;
std::vector<SamplerEntry> bindless_samplers;
std::vector<ImageEntry> images;
std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};

View File

@@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 {
Dump,
};
constexpr u32 NativeVersion = 1;
constexpr u32 NativeVersion = 3;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 12);
static_assert(sizeof(ShaderDiskCacheUsage) == 24);
static_assert(sizeof(BaseBindings) == 16);
static_assert(sizeof(ShaderDiskCacheUsage) == 40);
namespace {
@@ -104,9 +104,8 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
return true;
}
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
: system{system}, precompiled_cache_virtual_file_offset{0} {}
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -183,7 +182,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
return {{raws, usages}};
}
std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>
std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
ShaderDiskCacheOpenGL::LoadPrecompiled() {
if (!IsUsable())
return {};
@@ -207,7 +207,8 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
return *result;
}
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>>
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
@@ -228,7 +229,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
}
std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
ShaderDumpsMap dumps;
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
PrecompiledEntryKind kind{};
if (!LoadObjectFromPrecompiled(kind)) {
@@ -242,7 +243,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
auto entry = LoadDecompiledEntry();
const auto entry = LoadDecompiledEntry();
if (!entry) {
return {};
}
@@ -285,82 +286,97 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
if (!LoadObjectFromPrecompiled(code_size)) {
return {};
}
std::string code(code_size, '\0');
std::vector<u8> code(code_size);
if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
return {};
}
ShaderDiskCacheDecompiled entry;
entry.code = std::move(code);
entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
u32 const_buffers_count{};
if (!LoadObjectFromPrecompiled(const_buffers_count)) {
return {};
}
for (u32 i = 0; i < const_buffers_count; ++i) {
u32 max_offset{};
u32 index{};
bool is_indirect{};
u8 is_indirect{};
if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(is_indirect)) {
return {};
}
entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
}
u32 samplers_count{};
if (!LoadObjectFromPrecompiled(samplers_count)) {
return {};
}
for (u32 i = 0; i < samplers_count; ++i) {
u64 offset{};
u64 index{};
u32 type{};
bool is_array{};
bool is_shadow{};
bool is_bindless{};
u8 is_array{};
u8 is_shadow{};
u8 is_bindless{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
!LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
entry.entries.samplers.emplace_back(
entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
static_cast<std::size_t>(index),
static_cast<Tegra::Shader::TextureType>(type),
is_array != 0, is_shadow != 0, is_bindless != 0);
}
u32 images_count{};
if (!LoadObjectFromPrecompiled(images_count)) {
return {};
}
for (u32 i = 0; i < images_count; ++i) {
u64 offset{};
u64 index{};
u32 type{};
u8 is_bindless{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
entry.entries.images.emplace_back(
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0);
}
u32 global_memory_count{};
if (!LoadObjectFromPrecompiled(global_memory_count)) {
return {};
}
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
bool is_read{};
bool is_written{};
u8 is_read{};
u8 is_written{};
if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
!LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
return {};
}
entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
is_written);
entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
is_written != 0);
}
for (auto& clip_distance : entry.entries.clip_distances) {
if (!LoadObjectFromPrecompiled(clip_distance)) {
u8 clip_distance_raw{};
if (!LoadObjectFromPrecompiled(clip_distance_raw))
return {};
}
clip_distance = clip_distance_raw != 0;
}
u64 shader_length{};
if (!LoadObjectFromPrecompiled(shader_length)) {
return {};
}
entry.entries.shader_length = static_cast<std::size_t>(shader_length);
return entry;
@@ -381,7 +397,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
for (const auto& cbuf : entries.const_buffers) {
if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
!SaveObjectToPrecompiled(cbuf.IsIndirect())) {
!SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
return false;
}
}
@@ -393,9 +409,21 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
!SaveObjectToPrecompiled(sampler.IsArray()) ||
!SaveObjectToPrecompiled(sampler.IsShadow()) ||
!SaveObjectToPrecompiled(sampler.IsBindless())) {
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
return false;
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
return false;
}
for (const auto& image : entries.images) {
if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) {
return false;
}
}
@@ -406,13 +434,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
for (const auto& gmem : entries.global_memory_entries) {
if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
!SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
!SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
return false;
}
}
for (const bool clip_distance : entries.clip_distances) {
if (!SaveObjectToPrecompiled(clip_distance)) {
if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
return false;
}
}

View File

@@ -33,19 +33,18 @@ namespace OpenGL {
using ProgramCode = std::vector<u64>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ShaderDiskCacheUsage;
struct ShaderDiskCacheDump;
using TextureBufferUsage = std::bitset<64>;
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
/// Allocated bindings used by an OpenGL shader program
/// Allocated bindings used by an OpenGL shader program.
struct BaseBindings {
u32 cbuf{};
u32 gmem{};
u32 sampler{};
u32 image{};
bool operator==(const BaseBindings& rhs) const {
return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
return std::tie(cbuf, gmem, sampler, image) ==
std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
}
bool operator!=(const BaseBindings& rhs) const {
@@ -53,15 +52,29 @@ struct BaseBindings {
}
};
/// Describes how a shader is used
/// Describes the different variants a single program can be compiled.
struct ProgramVariant {
BaseBindings base_bindings;
GLenum primitive_mode{};
TextureBufferUsage texture_buffer_usage{};
bool operator==(const ProgramVariant& rhs) const {
return std::tie(base_bindings, primitive_mode, texture_buffer_usage) ==
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage);
}
bool operator!=(const ProgramVariant& rhs) const {
return !operator==(rhs);
}
};
/// Describes how a shader is used.
struct ShaderDiskCacheUsage {
u64 unique_identifier{};
BaseBindings bindings;
GLenum primitive{};
ProgramVariant variant;
bool operator==(const ShaderDiskCacheUsage& rhs) const {
return std::tie(unique_identifier, bindings, primitive) ==
std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant);
}
bool operator!=(const ShaderDiskCacheUsage& rhs) const {
@@ -75,16 +88,28 @@ namespace std {
template <>
struct hash<OpenGL::BaseBindings> {
std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
return static_cast<std::size_t>(bindings.cbuf) ^
(static_cast<std::size_t>(bindings.gmem) << 8) ^
(static_cast<std::size_t>(bindings.sampler) << 16) ^
(static_cast<std::size_t>(bindings.image) << 24);
}
};
template <>
struct hash<OpenGL::ProgramVariant> {
std::size_t operator()(const OpenGL::ProgramVariant& variant) const {
return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^
std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^
(static_cast<std::size_t>(variant.primitive_mode) << 6);
}
};
template <>
struct hash<OpenGL::ShaderDiskCacheUsage> {
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
return static_cast<std::size_t>(usage.unique_identifier) ^
std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
std::hash<OpenGL::ProgramVariant>()(usage.variant);
}
};
@@ -167,7 +192,6 @@ struct ShaderDiskCacheDump {
class ShaderDiskCacheOpenGL {
public:
explicit ShaderDiskCacheOpenGL(Core::System& system);
~ShaderDiskCacheOpenGL();
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
@@ -265,38 +289,24 @@ private:
return SaveArrayToPrecompiled(&object, 1);
}
bool SaveObjectToPrecompiled(bool object) {
const auto value = static_cast<u8>(object);
return SaveArrayToPrecompiled(&value, 1);
}
template <typename T>
bool LoadObjectFromPrecompiled(T& object) {
return LoadArrayFromPrecompiled(&object, 1);
}
bool LoadObjectFromPrecompiled(bool& object) {
u8 value;
const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
if (!read_ok) {
return false;
}
object = value != 0;
return true;
}
// Core system
Core::System& system;
// Stored transferable shaders
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
// Stores whole precompiled cache which will be read from/saved to the precompiled cache file
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
// Stores the current offset of the precompiled cache file for IO purposes
std::size_t precompiled_cache_virtual_file_offset = 0;
std::size_t precompiled_cache_virtual_file_offset;
// Stored transferable shaders
std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
// The cache has been loaded at boot
bool tried_to_load{};
};
} // namespace OpenGL
} // namespace OpenGL

View File

@@ -19,7 +19,8 @@ static constexpr u32 PROGRAM_OFFSET{10};
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: VS" + id + "\n\n";
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "// Shader Unique Id: VS" + id + "\n\n";
out += GetCommonDeclarations();
out += R"(
@@ -28,17 +29,18 @@ layout (location = 0) out vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
};
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
out += program.first;
if (setup.IsDualProgram()) {
const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
ProgramResult program_b =
Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
@@ -74,13 +76,14 @@ void main() {
}
})";
return {std::move(out), std::move(program.second)};
return {out, program.second};
}
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: GS" + id + "\n\n";
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "// Shader Unique Id: GS" + id + "\n\n";
out += GetCommonDeclarations();
out += R"(
@@ -90,10 +93,11 @@ layout (location = 0) out vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
};
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
out += program.first;
@@ -103,13 +107,14 @@ void main() {
execute_geometry();
};)";
return {std::move(out), std::move(program.second)};
return {out, program.second};
}
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: FS" + id + "\n\n";
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "// Shader Unique Id: FS" + id + "\n\n";
out += GetCommonDeclarations();
out += R"(
@@ -127,10 +132,35 @@ layout (location = 0) in noperspective vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
};
bool AlphaFunc(in float value) {
float ref = uintBitsToFloat(alpha_test[2]);
switch (alpha_test[1]) {
case 1:
return false;
case 2:
return value < ref;
case 3:
return value == ref;
case 4:
return value <= ref;
case 5:
return value > ref;
case 6:
return value != ref;
case 7:
return value >= ref;
case 8:
return true;
default:
return false;
}
}
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
@@ -142,7 +172,7 @@ void main() {
}
)";
return {std::move(out), std::move(program.second)};
return {out, program.second};
}
} // namespace OpenGL::GLShader

View File

@@ -48,6 +48,17 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shade
viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
auto func{static_cast<u32>(regs.alpha_test_func)};
// Normalize the gl variants of opCompare to be the same as the normal variants
const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
if (func >= op_gl_variant_base) {
func = func - op_gl_variant_base + 1U;
}
alpha_test.enabled = regs.alpha_test_enabled;
alpha_test.func = func;
alpha_test.ref = regs.alpha_test_ref;
instance_id = state.current_instance;
// Assign in which stage the position has to be flipped

View File

@@ -27,8 +27,14 @@ struct MaxwellUniformData {
GLuint flip_stage;
GLfloat y_direction;
};
struct alignas(16) {
GLuint enabled;
GLuint func;
GLfloat ref;
GLuint padding;
} alpha_test;
};
static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");

View File

@@ -156,10 +156,6 @@ OpenGLState::OpenGLState() {
polygon_offset.factor = 0.0f;
polygon_offset.units = 0.0f;
polygon_offset.clamp = 0.0f;
alpha_test.enabled = false;
alpha_test.func = GL_ALWAYS;
alpha_test.ref = 0.0f;
}
void OpenGLState::ApplyDefaultState() {
@@ -465,14 +461,6 @@ void OpenGLState::ApplyPolygonOffset() const {
}
}
void OpenGLState::ApplyAlphaTest() const {
Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled);
if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref),
std::tie(alpha_test.func, alpha_test.ref))) {
glAlphaFunc(alpha_test.func, alpha_test.ref);
}
}
void OpenGLState::ApplyTextures() const {
bool has_delta{};
std::size_t first{};
@@ -545,7 +533,6 @@ void OpenGLState::Apply() const {
ApplyTextures();
ApplySamplers();
ApplyPolygonOffset();
ApplyAlphaTest();
}
void OpenGLState::EmulateViewportWithScissor() {

View File

@@ -172,12 +172,6 @@ public:
GLfloat clamp;
} polygon_offset;
struct {
bool enabled; // GL_ALPHA_TEST
GLenum func; // GL_ALPHA_TEST_FUNC
GLfloat ref; // GL_ALPHA_TEST_REF
} alpha_test;
std::array<bool, 8> clip_distance; // GL_CLIP_DISTANCE
OpenGLState();
@@ -221,7 +215,6 @@ public:
void ApplySamplers() const;
void ApplyDepthClamp() const;
void ApplyPolygonOffset() const;
void ApplyAlphaTest() const;
/// Set the initial OpenGL state
static void ApplyDefaultState();

View File

@@ -15,7 +15,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent)
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
bool use_persistent)
: buffer_size(size) {
gl_buffer.Create();
@@ -29,7 +30,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
allocate_size *= 2;
}
if (GLAD_GL_ARB_buffer_storage) {
if (use_persistent) {
persistent = true;
coherent = prefer_coherent;
const GLbitfield flags =

View File

@@ -13,7 +13,8 @@ namespace OpenGL {
class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false);
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
bool use_persistent = true);
~OGLStreamBuffer();
GLuint GetHandle() const;

View File

@@ -126,10 +126,6 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_TRIANGLES;
case Maxwell::PrimitiveTopology::TriangleStrip:
return GL_TRIANGLE_STRIP;
case Maxwell::PrimitiveTopology::TriangleFan:
return GL_TRIANGLE_FAN;
case Maxwell::PrimitiveTopology::Quads:
return GL_QUADS;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();
@@ -175,8 +171,11 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
return GL_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::Border:
return GL_CLAMP_TO_BORDER;
case Tegra::Texture::WrapMode::Clamp:
return GL_CLAMP;
case Tegra::Texture::WrapMode::ClampOGL:
// TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
// GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to
// manually mix them. However the shader part of this is not yet implemented.
return GL_CLAMP_TO_BORDER;
case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
return GL_MIRROR_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::MirrorOnceBorder:

View File

@@ -97,8 +97,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
return matrix;
}
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {}
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system)
: VideoCore::RendererBase{window}, system{system} {}
RendererOpenGL::~RendererOpenGL() = default;
@@ -265,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() {
}
// Initialize sRGB Usage
OpenGLState::ClearsRGBUsed();
rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
@@ -472,7 +472,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
}
}
/// Initialize the renderer
bool RendererOpenGL::Init() {
Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};

View File

@@ -45,7 +45,7 @@ struct ScreenInfo {
class RendererOpenGL : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system);
~RendererOpenGL() override;
/// Swap buffers (render frame)
@@ -77,7 +77,6 @@ private:
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
Core::Frontend::EmuWindow& emu_window;
Core::System& system;
OpenGLState state;

View File

@@ -38,27 +38,27 @@ void BindBuffersRangePushBuffer::Bind() const {
sizes.data());
}
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) {
if (!GLAD_GL_KHR_debug) {
// We don't need to throw an error as this is just for debugging
return;
return; // We don't need to throw an error as this is just for debugging
}
const std::string nice_addr = fmt::format("0x{:016x}", addr);
std::string object_label;
if (extra_info.empty()) {
switch (identifier) {
case GL_TEXTURE:
object_label = fmt::format("Texture@0x{:016X}", addr);
object_label = "Texture@" + nice_addr;
break;
case GL_PROGRAM:
object_label = fmt::format("Shader@0x{:016X}", addr);
object_label = "Shader@" + nice_addr;
break;
default:
object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr);
object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr);
break;
}
} else {
object_label = fmt::format("{}@0x{:016X}", extra_info, addr);
object_label = extra_info + '@' + nice_addr;
}
glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
}

View File

@@ -4,7 +4,7 @@
#pragma once
#include <string_view>
#include <string>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
@@ -30,6 +30,6 @@ private:
std::vector<GLsizeiptr> sizes;
};
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = "");
} // namespace OpenGL

View File

@@ -52,7 +52,7 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
return vk::SamplerAddressMode::eClampToEdge;
case Tegra::Texture::WrapMode::Border:
return vk::SamplerAddressMode::eClampToBorder;
case Tegra::Texture::WrapMode::Clamp:
case Tegra::Texture::WrapMode::ClampOGL:
// TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
// eClampToBorder to get the border color of the texture, and then sample the edge to
// manually mix them. However the shader part of this is not yet implemented.

View File

@@ -194,8 +194,8 @@ public:
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
for (const auto& attribute : ir.GetInputAttributes()) {
entries.attributes.insert(GetGenericAttributeLocation(attribute));
for (const auto& attr : ir.GetInputAttributes()) {
entries.attributes.insert(GetGenericAttributeLocation(attr.first));
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
@@ -321,7 +321,8 @@ private:
}
void DeclareInputAttributes() {
for (const auto index : ir.GetInputAttributes()) {
for (const auto element : ir.GetInputAttributes()) {
const Attribute::Index index = element.first;
if (!IsGenericAttribute(index)) {
continue;
}
@@ -929,6 +930,11 @@ private:
return {};
}
Id ImageStore(Operation operation) {
UNIMPLEMENTED();
return {};
}
Id Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(operation[0]);
UNIMPLEMENTED_IF(!target);
@@ -1035,18 +1041,6 @@ private:
return {};
}
template <u32 element>
Id LocalInvocationId(Operation) {
UNIMPLEMENTED();
return {};
}
template <u32 element>
Id WorkGroupId(Operation) {
UNIMPLEMENTED();
return {};
}
Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
const std::string& name) {
const Id id = OpVariable(type, storage);
@@ -1293,6 +1287,8 @@ private:
&SPIRVDecompiler::TextureQueryLod,
&SPIRVDecompiler::TexelFetch,
&SPIRVDecompiler::ImageStore,
&SPIRVDecompiler::Branch,
&SPIRVDecompiler::PushFlowStack,
&SPIRVDecompiler::PopFlowStack,
@@ -1303,12 +1299,6 @@ private:
&SPIRVDecompiler::EndPrimitive,
&SPIRVDecompiler::YNegate,
&SPIRVDecompiler::LocalInvocationId<0>,
&SPIRVDecompiler::LocalInvocationId<1>,
&SPIRVDecompiler::LocalInvocationId<2>,
&SPIRVDecompiler::WorkGroupId<0>,
&SPIRVDecompiler::WorkGroupId<1>,
&SPIRVDecompiler::WorkGroupId<2>,
};
const ShaderIR& ir;

View File

@@ -168,6 +168,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
{OpCode::Type::Image, &ShaderIR::DecodeImage},
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},

View File

@@ -4,7 +4,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -153,4 +152,4 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -4,7 +4,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"

View File

@@ -4,7 +4,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -48,4 +47,4 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -49,4 +49,4 @@ u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -93,4 +93,4 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
}
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -46,4 +46,4 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -38,4 +38,4 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -56,4 +56,4 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -55,4 +55,4 @@ u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -53,4 +53,4 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -6,7 +6,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -65,4 +64,4 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -59,4 +59,4 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -0,0 +1,115 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
namespace {
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
switch (image_type) {
case Tegra::Shader::ImageType::Texture1D:
case Tegra::Shader::ImageType::TextureBuffer:
return 1;
case Tegra::Shader::ImageType::Texture1DArray:
case Tegra::Shader::ImageType::Texture2D:
return 2;
case Tegra::Shader::ImageType::Texture2DArray:
case Tegra::Shader::ImageType::Texture3D:
return 3;
}
UNREACHABLE();
return 1;
}
} // Anonymous namespace
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::SUST: {
UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer);
UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
std::vector<Node> values;
constexpr std::size_t hardcoded_size{4};
for (std::size_t i = 0; i < hardcoded_size; ++i) {
values.push_back(GetRegister(instr.gpr0.Value() + i));
}
std::vector<Node> coords;
const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
for (std::size_t i = 0; i < num_coords; ++i) {
coords.push_back(GetRegister(instr.gpr8.Value() + i));
}
const auto type{instr.sust.image_type};
const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
: GetBindlessImage(instr.gpr39, type)};
MetaImage meta{image, values};
const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))};
bb.push_back(store);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
}
return pc;
}
const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
const auto offset{static_cast<std::size_t>(image.index.Value())};
// If this image has already been used, return the existing mapping.
const auto itr{std::find_if(used_images.begin(), used_images.end(),
[=](const Image& entry) { return entry.GetOffset() == offset; })};
if (itr != used_images.end()) {
ASSERT(itr->GetType() == type);
return *itr;
}
// Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
const Image entry{offset, next_index, type};
return *used_images.emplace(entry).first;
}
const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
Tegra::Shader::ImageType type) {
const Node image_register{GetRegister(reg)};
const Node base_image{
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
const auto cbuf{std::get_if<CbufNode>(base_image)};
const auto cbuf_offset_imm{std::get_if<ImmediateNode>(cbuf->GetOffset())};
const auto cbuf_offset{cbuf_offset_imm->GetValue()};
const auto cbuf_index{cbuf->GetIndex()};
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
// If this image has already been used, return the existing mapping.
const auto itr{std::find_if(used_images.begin(), used_images.end(),
[=](const Image& entry) { return entry.GetOffset() == cbuf_key; })};
if (itr != used_images.end()) {
ASSERT(itr->GetType() == type);
return *itr;
}
// Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
const Image entry{cbuf_index, cbuf_offset, next_index, type};
return *used_images.emplace(entry).first;
}
} // namespace VideoCommon::Shader

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -46,4 +47,4 @@ u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -50,4 +50,4 @@ u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -47,20 +47,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
"Indirect attribute loads are not supported");
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
"Non-32 bits PHYS reads are not implemented");
const Node buffer{GetRegister(instr.gpr39)};
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
Tegra::Shader::IpaSampleMode::Default};
u64 next_element = instr.attribute.fmt20.element;
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
const auto LoadNextElement = [&](u32 reg_offset) {
const Node attribute{instr.attribute.fmt20.IsPhysical()
? GetPhysicalInputAttribute(instr.gpr8, buffer)
: GetInputAttribute(static_cast<Attribute::Index>(next_index),
next_element, buffer)};
const Node buffer = GetRegister(instr.gpr39);
const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
next_element, input_mode, buffer);
SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
@@ -146,25 +143,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
case OpCode::Id::LD:
case OpCode::Id::LDG: {
const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
switch (opcode->get().GetId()) {
case OpCode::Id::LD:
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
return instr.generic.type;
case OpCode::Id::LDG:
return instr.ldg.type;
default:
UNREACHABLE();
return {};
}
}();
const auto [real_address_base, base_address, descriptor] =
TrackAndGetGlobalMemory(bb, instr, false);
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
const u32 count = GetUniformTypeElementsCount(type);
const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
@@ -178,6 +162,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
case OpCode::Id::STG: {
const auto [real_address_base, base_address, descriptor] =
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
static_cast<u32>(instr.stg.immediate_offset.Value()), true);
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
SetTemporal(bb, 0, real_address_base);
const u32 count = GetUniformTypeElementsCount(instr.stg.type);
for (u32 i = 0; i < count; ++i) {
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
}
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
}
break;
}
case OpCode::Id::ST_A: {
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
"Indirect attribute loads are not supported");
@@ -233,56 +239,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
case OpCode::Id::ST:
case OpCode::Id::STG: {
const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
switch (opcode->get().GetId()) {
case OpCode::Id::ST:
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
return instr.generic.type;
case OpCode::Id::STG:
return instr.stg.type;
default:
UNREACHABLE();
return {};
}
}();
const auto [real_address_base, base_address, descriptor] =
TrackAndGetGlobalMemory(bb, instr, true);
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
SetTemporal(bb, 0, real_address_base);
const u32 count = GetUniformTypeElementsCount(type);
for (u32 i = 0; i < count; ++i) {
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
}
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
}
break;
}
case OpCode::Id::AL2P: {
// Ignore al2p.direction since we don't care about it.
// Calculate emulation fake physical address.
const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
const Node reg{GetRegister(instr.gpr8)};
const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
// Set the fake address to target register.
SetRegister(bb, instr.gpr0, fake_address);
// Signal the shader IR to declare all possible attributes and varyings
uses_physical_attributes = true;
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
}
@@ -291,11 +247,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
Instruction instr,
Node addr_register,
u32 immediate_offset,
bool is_write) {
const auto addr_register{GetRegister(instr.gmem.gpr)};
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
const Node base_address{
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
const auto cbuf = std::get_if<CbufNode>(base_address);

View File

@@ -4,7 +4,6 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -14,7 +13,6 @@ using Tegra::Shader::ConditionCode;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
using Tegra::Shader::SystemVariable;
u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
@@ -60,33 +58,20 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::MOV_SYS: {
const Node value = [&]() {
switch (instr.sys20) {
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
return Immediate(0u);
case SystemVariable::TidX:
return Operation(OperationCode::LocalInvocationIdX);
case SystemVariable::TidY:
return Operation(OperationCode::LocalInvocationIdY);
case SystemVariable::TidZ:
return Operation(OperationCode::LocalInvocationIdZ);
case SystemVariable::CtaIdX:
return Operation(OperationCode::WorkGroupIdX);
case SystemVariable::CtaIdY:
return Operation(OperationCode::WorkGroupIdY);
case SystemVariable::CtaIdZ:
return Operation(OperationCode::WorkGroupIdZ);
default:
UNIMPLEMENTED_MSG("Unhandled system move: {}",
static_cast<u32>(instr.sys20.Value()));
return Immediate(0u);
}
}();
SetRegister(bb, instr.gpr0, value);
switch (instr.sys20) {
case Tegra::Shader::SystemVariable::InvocationInfo: {
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
SetRegister(bb, instr.gpr0, Immediate(0u));
break;
}
case Tegra::Shader::SystemVariable::Ydirection: {
// Config pack's third value is Y_NEGATE's state.
SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate));
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value()));
}
break;
}
case OpCode::Id::BRA: {
@@ -145,18 +130,15 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::IPA: {
const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
const auto attribute = instr.attribute.fmt28;
const auto& attribute = instr.attribute.fmt28;
const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
instr.ipa.sample_mode.Value()};
Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
: GetInputAttribute(attribute.index, attribute.element);
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
Node value = attr;
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
index <= Tegra::Shader::Attribute::Index::Attribute_31;
if (is_generic || is_physical) {
if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
index <= Tegra::Shader::Attribute::Index::Attribute_31) {
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
// In theory by setting them as perspective, OpenGL does the perspective correction.
// A way must figured to reverse the last step of it.

View File

@@ -64,4 +64,4 @@ u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -43,4 +43,4 @@ u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -48,4 +48,4 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

Some files were not shown because too many files have changed in this diff Show More