Compare commits

...

12 Commits

Author SHA1 Message Date
Subv
7e5e4f8d7a FileSys: Append the requested path to the filesystem base path in DeleteFile.
We were trying to delete things in the current directory instead of the actual filesystem directory. This may fix some savedata issues in some games.
2018-07-14 10:57:22 -05:00
bunnei
81739a5448 Merge pull request #660 from Subv/depth_write
GPU: Always enable the depth write when clearing the depth buffer.
2018-07-14 00:38:12 -07:00
Subv
b37354cca8 GPU: Always enable the depth write when clearing the depth buffer.
The GPU ignores that register when clearing, but OpenGL obeys the glDepthMask parameter, so we set the depth mask to GL_TRUE when clearing the depth buffer. It will be restored to the correct value automatically on the next draw call.
2018-07-14 00:52:23 -05:00
bunnei
9fc0d1d701 Merge pull request #657 from bunnei/dual-vs
gl_shader_gen: Implement dual vertex shader mode.
2018-07-13 07:08:54 -07:00
Hedges
e066bc75b9 More improvements to GDBStub (#653)
* More improvements to GDBStub
- Debugging of threads should work correctly with source and assembly level stepping and modifying registers and memory, meaning threads and callstacks are fully clickable in VS.
- List of modules is available to the client, with assumption that .nro and .nso are backed up by an .elf with symbols, while deconstructed ROMs keep N names.
- Initial support for floating point registers.

* Tidy up as requested in PR feedback

* Tidy up as requested in PR feedback
2018-07-12 20:22:59 -07:00
bunnei
8aeff9cf8e gl_rasterizer: Fix check for if a shader stage is enabled. 2018-07-12 22:57:57 -04:00
bunnei
c4015cd93a gl_shader_gen: Implement dual vertex shader mode.
- When VertexA shader stage is enabled, we combine with VertexB program to make a single Vertex Shader stage.
2018-07-12 22:25:36 -04:00
bunnei
ce23ae3ede Merge pull request #656 from ogniK5377/audren-mem-init
Initialized memory for RequestUpdateAudioRenderer and fixed MemoryPoolSection to be more accurate
2018-07-12 19:12:47 -07:00
bunnei
64b5e5d5d9 Merge pull request #655 from bunnei/pred-lt-nan
gl_shader_decompiler: Implement PredCondition::LessThanWithNan.
2018-07-12 18:59:15 -07:00
bunnei
52636f67cc Merge pull request #654 from bunnei/cond-exit
gl_shader_decompiler: Use FlowCondition field in EXIT instruction.
2018-07-12 18:59:05 -07:00
bunnei
49c0c081c4 gl_shader_decompiler: Implement PredCondition::LessThanWithNan. 2018-07-12 20:04:35 -04:00
bunnei
4757ffdcce gl_shader_decompiler: Use FlowCondition field in EXIT instruction. 2018-07-12 20:00:37 -04:00
17 changed files with 378 additions and 160 deletions

View File

@@ -134,7 +134,7 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _
size_t dir_end = full_path.find_last_of("/"
// windows needs the : included for something like just "C:" to be considered a directory
#ifdef _WIN32
":"
"\\:"
#endif
);
if (std::string::npos == dir_end)

View File

@@ -193,11 +193,11 @@ void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
}
Kernel::Thread* thread = Kernel::GetCurrentThread();
SaveContext(thread->context);
if (last_bkpt_hit) {
if (last_bkpt_hit || (num_instructions == 1)) {
last_bkpt_hit = false;
GDBStub::Break();
GDBStub::SendTrap(thread, 5);
}
GDBStub::SendTrap(thread, 5);
}
}

View File

@@ -58,11 +58,13 @@ ResultVal<std::unique_ptr<StorageBackend>> Disk_FileSystem::OpenFile(const std::
}
ResultCode Disk_FileSystem::DeleteFile(const std::string& path) const {
if (!FileUtil::Exists(path)) {
std::string full_path = base_directory + path;
if (!FileUtil::Exists(full_path)) {
return ERROR_PATH_NOT_FOUND;
}
FileUtil::Delete(path);
FileUtil::Delete(full_path);
return RESULT_SUCCESS;
}

View File

@@ -61,10 +61,16 @@ const u32 SIGTERM = 15;
const u32 MSG_WAITALL = 8;
#endif
const u32 X30_REGISTER = 30;
const u32 LR_REGISTER = 30;
const u32 SP_REGISTER = 31;
const u32 PC_REGISTER = 32;
const u32 CPSR_REGISTER = 33;
const u32 UC_ARM64_REG_Q0 = 34;
const u32 FPSCR_REGISTER = 66;
// TODO/WiP - Used while working on support for FPU
const u32 TODO_DUMMY_REG_997 = 997;
const u32 TODO_DUMMY_REG_998 = 998;
// For sample XML files see the GDB source /gdb/features
// GDB also wants the l character at the start
@@ -130,6 +136,8 @@ static const char* target_xml =
</flags>
<reg name="cpsr" bitsize="32" type="cpsr_flags"/>
</feature>
<feature name="org.gnu.gdb.aarch64.fpu">
</feature>
</target>
)";
@@ -144,6 +152,7 @@ static u32 latest_signal = 0;
static bool memory_break = false;
static Kernel::Thread* current_thread = nullptr;
static u32 current_core = 0;
// Binding to a port within the reserved ports range (0-1023) requires root permissions,
// so default to a port outside of that range.
@@ -171,13 +180,34 @@ static std::map<u64, Breakpoint> breakpoints_execute;
static std::map<u64, Breakpoint> breakpoints_read;
static std::map<u64, Breakpoint> breakpoints_write;
struct Module {
std::string name;
PAddr beg;
PAddr end;
};
static std::vector<Module> modules;
void RegisterModule(std::string name, PAddr beg, PAddr end, bool add_elf_ext) {
Module module;
if (add_elf_ext) {
Common::SplitPath(name, nullptr, &module.name, nullptr);
module.name += ".elf";
} else {
module.name = std::move(name);
}
module.beg = beg;
module.end = end;
modules.push_back(std::move(module));
}
static Kernel::Thread* FindThreadById(int id) {
for (int core = 0; core < Core::NUM_CPU_CORES; core++) {
auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
for (auto thread : threads) {
for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
const auto& threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
for (auto& thread : threads) {
if (thread->GetThreadId() == id) {
current_thread = thread.get();
return current_thread;
current_core = core;
return thread.get();
}
}
}
@@ -197,6 +227,8 @@ static u64 RegRead(int id, Kernel::Thread* thread = nullptr) {
return thread->context.pc;
} else if (id == CPSR_REGISTER) {
return thread->context.cpsr;
} else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) {
return thread->context.fpu_registers[id - UC_ARM64_REG_Q0][0];
} else {
return 0;
}
@@ -215,6 +247,8 @@ static void RegWrite(int id, u64 val, Kernel::Thread* thread = nullptr) {
thread->context.pc = val;
} else if (id == CPSR_REGISTER) {
thread->context.cpsr = val;
} else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) {
thread->context.fpu_registers[id - (CPSR_REGISTER + 1)][0] = val;
}
}
@@ -534,7 +568,11 @@ static void HandleQuery() {
SendReply("T0");
} else if (strncmp(query, "Supported", strlen("Supported")) == 0) {
// PacketSize needs to be large enough for target xml
SendReply("PacketSize=2000;qXfer:features:read+");
std::string buffer = "PacketSize=2000;qXfer:features:read+;qXfer:threads:read+";
if (!modules.empty()) {
buffer += ";qXfer:libraries:read+";
}
SendReply(buffer.c_str());
} else if (strncmp(query, "Xfer:features:read:target.xml:",
strlen("Xfer:features:read:target.xml:")) == 0) {
SendReply(target_xml);
@@ -543,9 +581,9 @@ static void HandleQuery() {
SendReply(buffer.c_str());
} else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) {
std::string val = "m";
for (int core = 0; core < Core::NUM_CPU_CORES; core++) {
auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
for (auto thread : threads) {
for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
const auto& threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
for (const auto& thread : threads) {
val += fmt::format("{:x}", thread->GetThreadId());
val += ",";
}
@@ -554,6 +592,31 @@ static void HandleQuery() {
SendReply(val.c_str());
} else if (strncmp(query, "sThreadInfo", strlen("sThreadInfo")) == 0) {
SendReply("l");
} else if (strncmp(query, "Xfer:threads:read", strlen("Xfer:threads:read")) == 0) {
std::string buffer;
buffer += "l<?xml version=\"1.0\"?>";
buffer += "<threads>";
for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
const auto& threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
for (const auto& thread : threads) {
buffer +=
fmt::format(R"*(<thread id="{:x}" core="{:d}" name="Thread {:x}"></thread>)*",
thread->GetThreadId(), core, thread->GetThreadId());
}
}
buffer += "</threads>";
SendReply(buffer.c_str());
} else if (strncmp(query, "Xfer:libraries:read", strlen("Xfer:libraries:read")) == 0) {
std::string buffer;
buffer += "l<?xml version=\"1.0\"?>";
buffer += "<library-list>";
for (const auto& module : modules) {
buffer +=
fmt::format(R"*("<library name = "{}"><segment address = "0x{:x}"/></library>)*",
module.name, module.beg);
}
buffer += "</library-list>";
SendReply(buffer.c_str());
} else {
SendReply("");
}
@@ -561,33 +624,27 @@ static void HandleQuery() {
/// Handle set thread command from gdb client.
static void HandleSetThread() {
if (memcmp(command_buffer, "Hc", 2) == 0 || memcmp(command_buffer, "Hg", 2) == 0) {
int thread_id = -1;
if (command_buffer[2] != '-') {
thread_id = static_cast<int>(HexToInt(
command_buffer + 2,
command_length - 2 /*strlen(reinterpret_cast<char*>(command_buffer) + 2)*/));
}
if (thread_id >= 1) {
current_thread = FindThreadById(thread_id);
}
if (!current_thread) {
thread_id = 1;
current_thread = FindThreadById(thread_id);
}
if (current_thread) {
SendReply("OK");
return;
}
int thread_id = -1;
if (command_buffer[2] != '-') {
thread_id = static_cast<int>(HexToInt(command_buffer + 2, command_length - 2));
}
if (thread_id >= 1) {
current_thread = FindThreadById(thread_id);
}
if (!current_thread) {
thread_id = 1;
current_thread = FindThreadById(thread_id);
}
if (current_thread) {
SendReply("OK");
return;
}
SendReply("E01");
}
/// Handle thread alive command from gdb client.
static void HandleThreadAlive() {
int thread_id = static_cast<int>(
HexToInt(command_buffer + 1,
command_length - 1 /*strlen(reinterpret_cast<char*>(command_buffer) + 1)*/));
int thread_id = static_cast<int>(HexToInt(command_buffer + 1, command_length - 1));
if (thread_id == 0) {
thread_id = 1;
}
@@ -610,16 +667,23 @@ static void SendSignal(Kernel::Thread* thread, u32 signal, bool full = true) {
latest_signal = signal;
std::string buffer;
if (full) {
buffer = fmt::format("T{:02x}{:02x}:{:016x};{:02x}:{:016x};", latest_signal, PC_REGISTER,
Common::swap64(RegRead(PC_REGISTER, thread)), SP_REGISTER,
Common::swap64(RegRead(SP_REGISTER, thread)));
} else {
buffer = fmt::format("T{:02x};", latest_signal);
if (!thread) {
full = false;
}
buffer += fmt::format("thread:{:x};", thread->GetThreadId());
std::string buffer;
if (full) {
buffer = fmt::format("T{:02x}{:02x}:{:016x};{:02x}:{:016x};{:02x}:{:016x}", latest_signal,
PC_REGISTER, Common::swap64(RegRead(PC_REGISTER, thread)), SP_REGISTER,
Common::swap64(RegRead(SP_REGISTER, thread)), LR_REGISTER,
Common::swap64(RegRead(LR_REGISTER, thread)));
} else {
buffer = fmt::format("T{:02x}", latest_signal);
}
if (thread) {
buffer += fmt::format(";thread:{:x};", thread->GetThreadId());
}
SendReply(buffer.c_str());
}
@@ -711,8 +775,12 @@ static void ReadRegister() {
LongToGdbHex(reply, RegRead(id, current_thread));
} else if (id == CPSR_REGISTER) {
IntToGdbHex(reply, (u32)RegRead(id, current_thread));
} else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) {
LongToGdbHex(reply, RegRead(id, current_thread));
} else if (id == FPSCR_REGISTER) {
LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_998, current_thread));
} else {
return SendReply("E01");
LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_997, current_thread));
}
SendReply(reinterpret_cast<char*>(reply));
@@ -729,7 +797,7 @@ static void ReadRegisters() {
LongToGdbHex(bufptr + reg * 16, RegRead(reg, current_thread));
}
bufptr += (32 * 16);
bufptr += 32 * 16;
LongToGdbHex(bufptr, RegRead(PC_REGISTER, current_thread));
@@ -739,6 +807,16 @@ static void ReadRegisters() {
bufptr += 8;
for (int reg = UC_ARM64_REG_Q0; reg <= UC_ARM64_REG_Q0 + 31; reg++) {
LongToGdbHex(bufptr + reg * 16, RegRead(reg, current_thread));
}
bufptr += 32 * 32;
LongToGdbHex(bufptr, RegRead(TODO_DUMMY_REG_998, current_thread));
bufptr += 8;
SendReply(reinterpret_cast<char*>(buffer));
}
@@ -759,10 +837,17 @@ static void WriteRegister() {
RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
} else if (id == CPSR_REGISTER) {
RegWrite(id, GdbHexToInt(buffer_ptr), current_thread);
} else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) {
RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
} else if (id == FPSCR_REGISTER) {
RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr), current_thread);
} else {
return SendReply("E01");
RegWrite(TODO_DUMMY_REG_997, GdbHexToLong(buffer_ptr), current_thread);
}
// Update Unicorn context skipping scheduler, no running threads at this point
Core::System::GetInstance().ArmInterface(current_core).LoadContext(current_thread->context);
SendReply("OK");
}
@@ -773,18 +858,25 @@ static void WriteRegisters() {
if (command_buffer[0] != 'G')
return SendReply("E01");
for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) {
for (int i = 0, reg = 0; reg <= FPSCR_REGISTER; i++, reg++) {
if (reg <= SP_REGISTER) {
RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
} else if (reg == PC_REGISTER) {
RegWrite(PC_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread);
} else if (reg == CPSR_REGISTER) {
RegWrite(CPSR_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread);
} else if (reg >= UC_ARM64_REG_Q0 && reg < FPSCR_REGISTER) {
RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
} else if (reg == FPSCR_REGISTER) {
RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr + i * 16), current_thread);
} else {
UNIMPLEMENTED();
}
}
// Update Unicorn context skipping scheduler, no running threads at this point
Core::System::GetInstance().ArmInterface(current_core).LoadContext(current_thread->context);
SendReply("OK");
}
@@ -806,6 +898,10 @@ static void ReadMemory() {
SendReply("E01");
}
if (addr < Memory::PROCESS_IMAGE_VADDR || addr >= Memory::MAP_REGION_VADDR_END) {
return SendReply("E00");
}
if (!Memory::IsValidVirtualAddress(addr)) {
return SendReply("E00");
}
@@ -840,16 +936,18 @@ static void WriteMemory() {
}
void Break(bool is_memory_break) {
if (!halt_loop) {
halt_loop = true;
send_trap = true;
}
send_trap = true;
memory_break = is_memory_break;
}
/// Tell the CPU that it should perform a single step.
static void Step() {
if (command_length > 1) {
RegWrite(PC_REGISTER, GdbHexToLong(command_buffer + 1), current_thread);
// Update Unicorn context skipping scheduler, no running threads at this point
Core::System::GetInstance().ArmInterface(current_core).LoadContext(current_thread->context);
}
step_loop = true;
halt_loop = true;
send_trap = true;
@@ -1090,6 +1188,8 @@ static void Init(u16 port) {
breakpoints_read.clear();
breakpoints_write.clear();
modules.clear();
// Start gdb server
LOG_INFO(Debug_GDBStub, "Starting GDB server on port {}...", port);
@@ -1192,8 +1292,12 @@ void SetCpuStepFlag(bool is_step) {
void SendTrap(Kernel::Thread* thread, int trap) {
if (send_trap) {
if (!halt_loop || current_thread == thread) {
current_thread = thread;
SendSignal(thread, trap);
}
halt_loop = true;
send_trap = false;
SendSignal(thread, trap);
}
}
}; // namespace GDBStub

View File

@@ -6,6 +6,7 @@
#pragma once
#include <string>
#include "common/common_types.h"
#include "core/hle/kernel/thread.h"
@@ -51,6 +52,9 @@ bool IsServerEnabled();
/// Returns true if there is an active socket connection.
bool IsConnected();
/// Register module.
void RegisterModule(std::string name, PAddr beg, PAddr end, bool add_elf_ext = true);
/**
* Signal to the gdbstub server that it should halt CPU execution.
*
@@ -80,10 +84,10 @@ BreakpointAddress GetNextBreakpointFromAddress(PAddr addr, GDBStub::BreakpointTy
*/
bool CheckBreakpoint(PAddr addr, GDBStub::BreakpointType type);
// If set to true, the CPU will halt at the beginning of the next CPU loop.
/// If set to true, the CPU will halt at the beginning of the next CPU loop.
bool GetCpuHaltFlag();
// If set to true and the CPU is halted, the CPU will step one instruction.
/// If set to true and the CPU is halted, the CPU will step one instruction.
bool GetCpuStepFlag();
/**

View File

@@ -9,6 +9,7 @@
#include "common/logging/log.h"
#include "common/string_util.h"
#include "core/file_sys/romfs_factory.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/hle/service/filesystem/filesystem.h"
@@ -133,6 +134,8 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(
next_load_addr = AppLoader_NSO::LoadModule(path, load_addr);
if (next_load_addr) {
LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr);
// Register module with GDBStub
GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false);
} else {
next_load_addr = load_addr;
}

View File

@@ -7,10 +7,12 @@
#include "common/common_funcs.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/string_util.h"
#include "common/swap.h"
#include "core/core.h"
#include "core/file_sys/program_metadata.h"
#include "core/file_sys/romfs_factory.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/hle/service/filesystem/filesystem.h"
@@ -259,6 +261,8 @@ ResultStatus AppLoader_NCA::Load(Kernel::SharedPtr<Kernel::Process>& process) {
next_load_addr = AppLoader_NSO::LoadModule(module, nca->GetExeFsFile(module), load_addr);
if (next_load_addr) {
LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr);
// Register module with GDBStub
GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false);
} else {
next_load_addr = load_addr;
}

View File

@@ -9,6 +9,7 @@
#include "common/logging/log.h"
#include "common/swap.h"
#include "core/core.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/loader/nro.h"
@@ -115,6 +116,9 @@ bool AppLoader_NRO::LoadNro(const std::string& path, VAddr load_base) {
codeset->memory = std::make_shared<std::vector<u8>>(std::move(program_image));
Core::CurrentProcess()->LoadModule(codeset, load_base);
// Register module with GDBStub
GDBStub::RegisterModule(codeset->name, load_base, load_base);
return true;
}

View File

@@ -10,6 +10,7 @@
#include "common/logging/log.h"
#include "common/swap.h"
#include "core/core.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/loader/nso.h"
@@ -147,6 +148,9 @@ VAddr AppLoader_NSO::LoadModule(const std::string& name, const std::vector<u8>&
codeset->memory = std::make_shared<std::vector<u8>>(std::move(program_image));
Core::CurrentProcess()->LoadModule(codeset, load_base);
// Register module with GDBStub
GDBStub::RegisterModule(codeset->name, load_base, load_base);
return load_base + image_size;
}

View File

@@ -398,27 +398,6 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
return regs.reg_array[method];
}
bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const {
// The Vertex stage is always enabled.
if (stage == Regs::ShaderStage::Vertex)
return true;
switch (stage) {
case Regs::ShaderStage::TesselationControl:
return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationControl)]
.enable != 0;
case Regs::ShaderStage::TesselationEval:
return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationEval)]
.enable != 0;
case Regs::ShaderStage::Geometry:
return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Geometry)].enable != 0;
case Regs::ShaderStage::Fragment:
return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Fragment)].enable != 0;
}
UNREACHABLE();
}
void Maxwell3D::ProcessClearBuffers() {
ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
regs.clear_buffers.R == regs.clear_buffers.B &&

View File

@@ -379,6 +379,14 @@ public:
}
};
bool IsShaderConfigEnabled(size_t index) const {
// The VertexB is always enabled.
if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) {
return true;
}
return shader_config[index].enable != 0;
}
union {
struct {
INSERT_PADDING_WORDS(0x45);
@@ -780,9 +788,6 @@ public:
/// Returns the texture information for a specific texture in a specific shader stage.
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
/// Returns whether the specified shader stage is enabled or not.
bool IsShaderStageEnabled(Regs::ShaderStage stage) const;
private:
std::unordered_map<u32, std::vector<u32>> uploaded_macros;

View File

@@ -142,6 +142,7 @@ enum class PredCondition : u64 {
GreaterThan = 4,
NotEqual = 5,
GreaterEqual = 6,
LessThanWithNan = 9,
NotEqualWithNan = 13,
// TODO(Subv): Other condition types
};
@@ -201,6 +202,11 @@ enum class IMinMaxExchange : u64 {
XHi = 3,
};
enum class FlowCondition : u64 {
Always = 0xF,
Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
};
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -315,6 +321,10 @@ union Instruction {
}
} bfe;
union {
BitField<0, 5, FlowCondition> cond;
} flow;
union {
BitField<48, 1, u64> negate_b;
BitField<49, 1, u64> negate_c;

View File

@@ -181,6 +181,19 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
return {array_ptr, buffer_offset};
}
static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program) {
auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
// Fetch program code from memory
GLShader::ProgramCode program_code;
auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)};
Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
return program_code;
}
void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
// Helper function for uploading uniform data
const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
@@ -193,26 +206,23 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
};
auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = uniform_buffers.size();
u32 current_texture_bindpoint = 0;
for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
auto& shader_config = gpu.regs.shader_config[index];
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
const auto& stage = index - 1; // Stage indices are 0 - 5
const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage));
// Skip stages that are not enabled
if (!is_enabled) {
if (!gpu.regs.IsShaderConfigEnabled(index)) {
continue;
}
const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu.state.shader_stages[stage]);
std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
@@ -228,16 +238,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
buffer_ptr += sizeof(GLShader::MaxwellUniformData);
buffer_offset += sizeof(GLShader::MaxwellUniformData);
// Fetch program code from memory
GLShader::ProgramCode program_code;
const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)};
Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
GLShader::ShaderSetup setup{std::move(program_code)};
GLShader::ShaderSetup setup{GetShaderProgramCode(program)};
GLShader::ShaderEntries shader_resources;
switch (program) {
case Maxwell::ShaderProgram::VertexA: {
// VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
// Conventional HW does not support this, so we combine VertexA and VertexB into one
// stage here.
setup.SetProgramB(GetShaderProgramCode(Maxwell::ShaderProgram::VertexB));
GLShader::MaxwellVSConfig vs_config{setup};
shader_resources =
shader_program_manager->UseProgrammableVertexShader(vs_config, setup);
break;
}
case Maxwell::ShaderProgram::VertexB: {
GLShader::MaxwellVSConfig vs_config{setup};
shader_resources =
@@ -268,6 +283,12 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
current_texture_bindpoint =
SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
current_texture_bindpoint, shader_resources.texture_samplers);
// When VertexA is enabled, we have dual vertex shaders
if (program == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
index++;
}
}
shader_program_manager->UseTrivialGeometryShader();
@@ -301,9 +322,6 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
bool using_depth_fb) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
// Sync the depth test state before configuring the framebuffer surfaces.
SyncDepthTestState();
// TODO(bunnei): Implement this
const bool has_stencil = false;
@@ -368,6 +386,13 @@ void RasterizerOpenGL::Clear() {
if (regs.clear_buffers.Z) {
clear_mask |= GL_DEPTH_BUFFER_BIT;
use_depth_fb = true;
// Always enable the depth write when clearing the depth buffer. The depth write mask is
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
state.depth.test_enabled = true;
state.depth.write_mask = GL_TRUE;
state.depth.test_func = GL_ALWAYS;
state.Apply();
}
if (clear_mask == 0)
@@ -402,6 +427,7 @@ void RasterizerOpenGL::DrawArrays() {
auto [dirty_color_surface, dirty_depth_surface] =
ConfigureFramebuffers(true, regs.zeta.Address() != 0);
SyncDepthTestState();
SyncBlendState();
SyncCullMode();
@@ -605,9 +631,6 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
auto& gpu = Core::System::GetInstance().GPU();
auto& maxwell3d = gpu.Get3DEngine();
ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
"Attempted to upload constbuffer of disabled shader stage");
// Reset all buffer draw state for this stage.
for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) {
buffer.bindpoint = 0;
@@ -674,9 +697,6 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
auto& gpu = Core::System::GetInstance().GPU();
auto& maxwell3d = gpu.Get3DEngine();
ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
"Attempted to upload textures of disabled shader stage");
ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
"Exceeded the number of active textures.");

View File

@@ -42,13 +42,14 @@ enum class ExitMethod {
struct Subroutine {
/// Generates a name suitable for GLSL source code.
std::string GetName() const {
return "sub_" + std::to_string(begin) + '_' + std::to_string(end);
return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix;
}
u32 begin; ///< Entry point of the subroutine.
u32 end; ///< Return point of the subroutine.
ExitMethod exit_method; ///< Exit method of the subroutine.
std::set<u32> labels; ///< Addresses refereced by JMP instructions.
u32 begin; ///< Entry point of the subroutine.
u32 end; ///< Return point of the subroutine.
const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name
ExitMethod exit_method; ///< Exit method of the subroutine.
std::set<u32> labels; ///< Addresses refereced by JMP instructions.
bool operator<(const Subroutine& rhs) const {
return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
@@ -58,11 +59,11 @@ struct Subroutine {
/// Analyzes shader code and produces a set of subroutines.
class ControlFlowAnalyzer {
public:
ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset)
ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix)
: program_code(program_code) {
// Recursively finds all subroutines.
const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END);
const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix);
if (program_main.exit_method != ExitMethod::AlwaysEnd)
throw DecompileFail("Program does not always end");
}
@@ -77,12 +78,12 @@ private:
std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
/// Adds and analyzes a new subroutine if it is not added yet.
const Subroutine& AddSubroutine(u32 begin, u32 end) {
auto iter = subroutines.find(Subroutine{begin, end});
const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) {
auto iter = subroutines.find(Subroutine{begin, end, suffix});
if (iter != subroutines.end())
return *iter;
Subroutine subroutine{begin, end};
Subroutine subroutine{begin, end, suffix};
subroutine.exit_method = Scan(begin, end, subroutine.labels);
if (subroutine.exit_method == ExitMethod::Undetermined)
throw DecompileFail("Recursive function detected");
@@ -191,7 +192,8 @@ public:
UnsignedInteger,
};
GLSLRegister(size_t index, ShaderWriter& shader) : index{index}, shader{shader} {}
GLSLRegister(size_t index, ShaderWriter& shader, const std::string& suffix)
: index{index}, shader{shader}, suffix{suffix} {}
/// Gets the GLSL type string for a register
static std::string GetTypeString(Type type) {
@@ -216,7 +218,7 @@ public:
/// Returns a GLSL string representing the current state of the register
const std::string GetActiveString() {
declr_type.insert(active_type);
return GetPrefixString(active_type) + std::to_string(index);
return GetPrefixString(active_type) + std::to_string(index) + '_' + suffix;
}
/// Returns true if the active type is a float
@@ -251,6 +253,7 @@ private:
ShaderWriter& shader;
Type active_type{Type::Float};
std::set<Type> declr_type;
const std::string& suffix;
};
/**
@@ -262,8 +265,8 @@ private:
class GLSLRegisterManager {
public:
GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations,
const Maxwell3D::Regs::ShaderStage& stage)
: shader{shader}, declarations{declarations}, stage{stage} {
const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix)
: shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} {
BuildRegisterList();
}
@@ -430,12 +433,12 @@ public:
}
/// Add declarations for registers
void GenerateDeclarations() {
void GenerateDeclarations(const std::string& suffix) {
for (const auto& reg : regs) {
for (const auto& type : reg.DeclaredTypes()) {
declarations.AddLine(GLSLRegister::GetTypeString(type) + ' ' +
GLSLRegister::GetPrefixString(type) +
std::to_string(reg.GetIndex()) + " = 0;");
reg.GetPrefixString(type) + std::to_string(reg.GetIndex()) +
'_' + suffix + " = 0;");
}
}
declarations.AddNewLine();
@@ -558,7 +561,7 @@ private:
/// Build the GLSL register list.
void BuildRegisterList() {
for (size_t index = 0; index < Register::NumRegisters; ++index) {
regs.emplace_back(index, shader);
regs.emplace_back(index, shader, suffix);
}
}
@@ -620,16 +623,17 @@ private:
std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
std::vector<SamplerEntry> used_samplers;
const Maxwell3D::Regs::ShaderStage& stage;
const std::string& suffix;
};
class GLSLGenerator {
public:
GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
u32 main_offset, Maxwell3D::Regs::ShaderStage stage)
u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
: subroutines(subroutines), program_code(program_code), main_offset(main_offset),
stage(stage) {
stage(stage), suffix(suffix) {
Generate();
Generate(suffix);
}
std::string GetShaderCode() {
@@ -644,7 +648,7 @@ public:
private:
/// Gets the Subroutine object corresponding to the specified address.
const Subroutine& GetSubroutine(u32 begin, u32 end) const {
auto iter = subroutines.find(Subroutine{begin, end});
auto iter = subroutines.find(Subroutine{begin, end, suffix});
ASSERT(iter != subroutines.end());
return *iter;
}
@@ -689,7 +693,7 @@ private:
// Can't assign to the constant predicate.
ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
std::string variable = 'p' + std::to_string(pred);
std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
shader.AddLine(variable + " = " + value + ';');
declr_predicates.insert(std::move(variable));
}
@@ -707,7 +711,7 @@ private:
if (index == static_cast<u64>(Pred::UnusedIndex))
variable = "true";
else
variable = 'p' + std::to_string(index);
variable = 'p' + std::to_string(index) + '_' + suffix;
if (negate) {
return "!(" + variable + ')';
@@ -728,10 +732,10 @@ private:
const std::string& op_a, const std::string& op_b) const {
using Tegra::Shader::PredCondition;
static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
{PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
{PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
{PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
{PredCondition::NotEqualWithNan, "!="},
{PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
{PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
{PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
{PredCondition::LessThanWithNan, "<"}, {PredCondition::NotEqualWithNan, "!="},
};
const auto& comparison{PredicateComparisonStrings.find(condition)};
@@ -739,7 +743,8 @@ private:
"Unknown predicate comparison operation");
std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
if (condition == PredCondition::NotEqualWithNan) {
if (condition == PredCondition::LessThanWithNan ||
condition == PredCondition::NotEqualWithNan) {
predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
}
@@ -1639,16 +1644,32 @@ private:
shader.AddLine("color.a = " + regs.GetRegisterAsFloat(3) + ';');
}
shader.AddLine("return true;");
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
// If this is an unconditional exit then just end processing here, otherwise
// we have to account for the possibility of the condition not being met, so
// continue processing the next instruction.
offset = PROGRAM_END - 1;
switch (instr.flow.cond) {
case Tegra::Shader::FlowCondition::Always:
shader.AddLine("return true;");
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
// If this is an unconditional exit then just end processing here,
// otherwise we have to account for the possibility of the condition
// not being met, so continue processing the next instruction.
offset = PROGRAM_END - 1;
}
break;
case Tegra::Shader::FlowCondition::Fcsm_Tr:
// TODO(bunnei): What is this used for? If we assume this conditon is not
// satisifed, dual vertex shaders in Farming Simulator make more sense
LOG_CRITICAL(HW_GPU, "Skipping unknown FlowCondition::Fcsm_Tr");
break;
default:
LOG_CRITICAL(HW_GPU, "Unhandled flow condition: {}",
static_cast<u32>(instr.flow.cond.Value()));
UNREACHABLE();
}
break;
}
case OpCode::Id::KIL: {
ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
shader.AddLine("discard;");
break;
}
@@ -1669,8 +1690,9 @@ private:
// can ignore this when generating GLSL code.
break;
}
case OpCode::Id::DEPBAR:
case OpCode::Id::SYNC: {
case OpCode::Id::SYNC:
ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
case OpCode::Id::DEPBAR: {
// TODO(Subv): Find out if we actually have to care about these instructions or if
// the GLSL compiler takes care of that for us.
LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed");
@@ -1710,7 +1732,7 @@ private:
return program_counter;
}
void Generate() {
void Generate(const std::string& suffix) {
// Add declarations for all subroutines
for (const auto& subroutine : subroutines) {
shader.AddLine("bool " + subroutine.GetName() + "();");
@@ -1718,7 +1740,7 @@ private:
shader.AddNewLine();
// Add the main entry point
shader.AddLine("bool exec_shader() {");
shader.AddLine("bool exec_" + suffix + "() {");
++shader.scope;
CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
--shader.scope;
@@ -1781,7 +1803,7 @@ private:
/// Add declarations for registers
void GenerateDeclarations() {
regs.GenerateDeclarations();
regs.GenerateDeclarations(suffix);
for (const auto& pred : declr_predicates) {
declarations.AddLine("bool " + pred + " = false;");
@@ -1794,27 +1816,30 @@ private:
const ProgramCode& program_code;
const u32 main_offset;
Maxwell3D::Regs::ShaderStage stage;
const std::string& suffix;
ShaderWriter shader;
ShaderWriter declarations;
GLSLRegisterManager regs{shader, declarations, stage};
GLSLRegisterManager regs{shader, declarations, stage, suffix};
// Declarations
std::set<std::string> declr_predicates;
}; // namespace Decompiler
std::string GetCommonDeclarations() {
std::string declarations = "bool exec_shader();\n";
std::string declarations;
declarations += "#define MAX_CONSTBUFFER_ELEMENTS " +
std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4)));
declarations += '\n';
return declarations;
}
boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
Maxwell3D::Regs::ShaderStage stage) {
Maxwell3D::Regs::ShaderStage stage,
const std::string& suffix) {
try {
auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines();
GLSLGenerator generator(subroutines, program_code, main_offset, stage);
auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix);
return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
} catch (const DecompileFail& exception) {
LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());

View File

@@ -20,7 +20,8 @@ using Tegra::Engines::Maxwell3D;
std::string GetCommonDeclarations();
boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
Maxwell3D::Regs::ShaderStage stage);
Maxwell3D::Regs::ShaderStage stage,
const std::string& suffix);
} // namespace Decompiler
} // namespace GLShader

View File

@@ -17,10 +17,17 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConf
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += Decompiler::GetCommonDeclarations();
out += "bool exec_vertex();\n";
if (setup.IsDualProgram()) {
out += "bool exec_vertex_b();\n";
}
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
.get_value_or({});
ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex)
.get_value_or({});
out += R"(
out gl_PerVertex {
@@ -34,7 +41,14 @@ layout (std140) uniform vs_config {
};
void main() {
exec_shader();
exec_vertex();
)";
if (setup.IsDualProgram()) {
out += " exec_vertex_b();";
}
out += R"(
// Viewport can be flipped, which is unsupported by glViewport
position.xy *= viewport_flip.xy;
@@ -44,8 +58,19 @@ void main() {
// For now, this is here to bring order in lieu of proper emulation
position.w = 1.0;
}
)";
out += program.first;
if (setup.IsDualProgram()) {
ProgramResult program_b =
Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
.get_value_or({});
out += program_b.first;
}
return {out, program.second};
}
@@ -53,12 +78,13 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += Decompiler::GetCommonDeclarations();
out += "bool exec_fragment();\n";
ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Fragment)
.get_value_or({});
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
.get_value_or({});
out += R"(
in vec4 position;
out vec4 color;
@@ -67,7 +93,7 @@ layout (std140) uniform fs_config {
};
void main() {
exec_shader();
exec_fragment();
}
)";

View File

@@ -115,21 +115,48 @@ struct ShaderEntries {
using ProgramResult = std::pair<std::string, ShaderEntries>;
struct ShaderSetup {
ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {}
ShaderSetup(const ProgramCode& program_code) {
program.code = program_code;
}
struct {
ProgramCode code;
ProgramCode code_b; // Used for dual vertex shaders
} program;
ProgramCode program_code;
bool program_code_hash_dirty = true;
u64 GetProgramCodeHash() {
if (program_code_hash_dirty) {
program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
program_code_hash = GetNewHash();
program_code_hash_dirty = false;
}
return program_code_hash;
}
/// Used in scenarios where we have a dual vertex shaders
void SetProgramB(const ProgramCode& program_b) {
program.code_b = program_b;
has_program_b = true;
}
bool IsDualProgram() const {
return has_program_b;
}
private:
u64 GetNewHash() const {
if (has_program_b) {
// Compute hash over dual shader programs
return Common::ComputeHash64(&program, sizeof(program));
} else {
// Compute hash over a single shader program
return Common::ComputeHash64(&program.code, program.code.size());
}
}
u64 program_code_hash{};
bool has_program_b{};
};
struct MaxwellShaderConfigCommon {