Compare commits

..

22 Commits

Author SHA1 Message Date
lat9nq
74e7f07bef shader_recompiler: Implement LowerInt16ToInt32
AMD drivers 22.3.2 and later expose a bug in yuzu, where the application
would submit 16-bit integer instructions to GPUs that don't support
16-bit integers, namely GCN 4 devices.

Replace any 16-bit instructions with 32-bit ones so newer AMD drivers
will work with VK_KHR_workgroup_memory_explicit_layout.
2022-05-19 16:01:18 -04:00
Mai M
b57df1dcb9 Merge pull request #8351 from abouvier/patch-2
video_core: Support new VkResult
2022-05-17 14:10:00 -04:00
Alexandre Bouvier
020982508d video_core: Support new VkResult 2022-05-17 17:37:10 +02:00
Mai M
5808e76fae Merge pull request #8336 from abouvier/unspirv
sirit: Allow using system spirv-headers
2022-05-15 09:24:05 -04:00
Mai M
5c20373db3 Merge pull request #8337 from lioncash/fmt
general: Avoid ambiguous format_to compilation errors
2022-05-14 18:51:23 -04:00
Lioncash
f981e90af3 general: Avoid ambiguous format_to compilation errors
Ensures that we're using the fmt version of format_to.

These are also the only three outliers. All of the other formatters we
have are properly qualified.
2022-05-14 16:48:34 -04:00
Alexandre Bouvier
55b0dda57c sirit: Allow using system spirv-headers 2022-05-14 22:03:23 +02:00
Morph
0b9ef3c0b8 Merge pull request #8308 from german77/disablesix
service: hid: Disable correctly motion input
2022-05-11 17:51:54 -04:00
Morph
2fb6df3fe9 Merge pull request #8314 from liamwhite/gl-flip-2
OpenGL: interpret face flips according to GL NDC
2022-05-11 17:51:18 -04:00
Morph
dca63391b6 Merge pull request #8313 from liamwhite/dma-bpp
maxwell_dma: fix bytes_per_pixel
2022-05-11 17:51:02 -04:00
Morph
c2b583c911 Merge pull request #8328 from liamwhite/macro-clear
video_core/macro: clear code on upload address assignment
2022-05-11 17:49:25 -04:00
Liam
e7ba9fd7e1 maxwell_dma: use fallback if remapping is enabled 2022-05-10 19:26:48 -04:00
Liam
e158951695 video_core/macro: clear code on upload address assignment 2022-05-10 17:07:21 -04:00
Mai M
f345ffdc0f Merge pull request #8325 from zhaobot/tx-update-20220509164742
Update translations (2022-05-09)
2022-05-09 18:10:34 -04:00
The yuzu Community
26e5bc6082 Update translations (2022-05-09) 2022-05-09 16:48:01 +00:00
Mai M
2123594ce2 Merge pull request #8320 from liamwhite/macro-dump
video_core/macro: Add option to dump macros
2022-05-08 22:56:01 -04:00
Fernando Sahmkow
5562ae9cc5 VideoCore: Add option to dump the macros.
Co-Authored-By: liamwhite <liamwhite@users.noreply.github.com>
2022-05-08 21:37:34 -04:00
Mai M
4087f1d10f Merge pull request #8319 from liamwhite/macro-warn
video_core/macro_jit_x64: warn on invalid parameter access
2022-05-08 04:58:33 -04:00
Liam
7fe5004f90 video_core/macro_jit_x64: warn on invalid parameter access 2022-05-08 02:48:03 -04:00
Liam
4f1a2c2562 maxwell_dma: fix bytes per pixel 2022-05-06 18:18:00 -04:00
Narr the Reg
babd580c64 service: hid: Fix motion refresh rate 2022-05-06 11:13:49 -05:00
german77
7eb0992d2c service: hid: Disable correctly motion input 2022-05-06 11:13:48 -05:00
45 changed files with 16249 additions and 13050 deletions

1223
dist/languages/ca.ts vendored

File diff suppressed because it is too large Load Diff

1233
dist/languages/cs.ts vendored

File diff suppressed because it is too large Load Diff

1223
dist/languages/da.ts vendored

File diff suppressed because it is too large Load Diff

1237
dist/languages/de.ts vendored

File diff suppressed because it is too large Load Diff

1335
dist/languages/el.ts vendored

File diff suppressed because it is too large Load Diff

1223
dist/languages/es.ts vendored

File diff suppressed because it is too large Load Diff

1228
dist/languages/fr.ts vendored

File diff suppressed because it is too large Load Diff

1254
dist/languages/id.ts vendored

File diff suppressed because it is too large Load Diff

1233
dist/languages/it.ts vendored

File diff suppressed because it is too large Load Diff

1229
dist/languages/ja_JP.ts vendored

File diff suppressed because it is too large Load Diff

1290
dist/languages/ko_KR.ts vendored

File diff suppressed because it is too large Load Diff

1235
dist/languages/nb.ts vendored

File diff suppressed because it is too large Load Diff

1231
dist/languages/nl.ts vendored

File diff suppressed because it is too large Load Diff

1229
dist/languages/pl.ts vendored

File diff suppressed because it is too large Load Diff

1213
dist/languages/pt_BR.ts vendored

File diff suppressed because it is too large Load Diff

1221
dist/languages/pt_PT.ts vendored

File diff suppressed because it is too large Load Diff

1825
dist/languages/ru_RU.ts vendored

File diff suppressed because it is too large Load Diff

1229
dist/languages/sv.ts vendored

File diff suppressed because it is too large Load Diff

1227
dist/languages/tr_TR.ts vendored

File diff suppressed because it is too large Load Diff

1229
dist/languages/vi.ts vendored

File diff suppressed because it is too large Load Diff

1229
dist/languages/vi_VN.ts vendored

File diff suppressed because it is too large Load Diff

1225
dist/languages/zh_CN.ts vendored

File diff suppressed because it is too large Load Diff

1221
dist/languages/zh_TW.ts vendored

File diff suppressed because it is too large Load Diff

View File

@@ -606,6 +606,7 @@ struct Values {
BasicSetting<bool> dump_exefs{false, "dump_exefs"};
BasicSetting<bool> dump_nso{false, "dump_nso"};
BasicSetting<bool> dump_shaders{false, "dump_shaders"};
BasicSetting<bool> dump_macros{false, "dump_macros"};
BasicSetting<bool> enable_fs_access_log{false, "enable_fs_access_log"};
BasicSetting<bool> reporting_services{false, "reporting_services"};
BasicSetting<bool> quest_flag{false, "quest_flag"};

View File

@@ -20,7 +20,7 @@ struct fmt::formatter<Dynarmic::A32::CoprocReg> {
}
template <typename FormatContext>
auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) {
return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg));
return fmt::format_to(ctx.out(), "cp{}", static_cast<size_t>(reg));
}
};

View File

@@ -3,7 +3,9 @@
#include <algorithm>
#include <array>
#include <chrono>
#include <cstring>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
@@ -529,6 +531,14 @@ void Controller_NPad::OnMotionUpdate(const Core::Timing::CoreTiming& core_timing
auto& sixaxis_left_lifo_state = controller.sixaxis_left_lifo_state;
auto& sixaxis_right_lifo_state = controller.sixaxis_right_lifo_state;
// Clear previous state
sixaxis_fullkey_state = {};
sixaxis_handheld_state = {};
sixaxis_dual_left_state = {};
sixaxis_dual_right_state = {};
sixaxis_left_lifo_state = {};
sixaxis_right_lifo_state = {};
if (controller.sixaxis_sensor_enabled && Settings::values.motion_enabled.GetValue()) {
controller.sixaxis_at_rest = true;
for (std::size_t e = 0; e < motion_state.size(); ++e) {
@@ -537,69 +547,55 @@ void Controller_NPad::OnMotionUpdate(const Core::Timing::CoreTiming& core_timing
}
}
const auto set_motion_state = [&](SixAxisSensorState& state,
const Core::HID::ControllerMotion& hid_state) {
using namespace std::literals::chrono_literals;
static constexpr SixAxisSensorState default_motion_state = {
.delta_time = std::chrono::nanoseconds(5ms).count(),
.accel = {0, 0, -1.0f},
.orientation =
{
Common::Vec3f{1.0f, 0, 0},
Common::Vec3f{0, 1.0f, 0},
Common::Vec3f{0, 0, 1.0f},
},
.attribute = {1},
};
if (!controller.sixaxis_sensor_enabled) {
state = default_motion_state;
return;
}
if (!Settings::values.motion_enabled.GetValue()) {
state = default_motion_state;
return;
}
state.attribute.is_connected.Assign(1);
state.delta_time = std::chrono::nanoseconds(5ms).count();
state.accel = hid_state.accel;
state.gyro = hid_state.gyro;
state.rotation = hid_state.rotation;
state.orientation = hid_state.orientation;
};
switch (controller_type) {
case Core::HID::NpadStyleIndex::None:
UNREACHABLE();
break;
case Core::HID::NpadStyleIndex::ProController:
sixaxis_fullkey_state.attribute.raw = 0;
if (controller.sixaxis_sensor_enabled) {
sixaxis_fullkey_state.attribute.is_connected.Assign(1);
sixaxis_fullkey_state.accel = motion_state[0].accel;
sixaxis_fullkey_state.gyro = motion_state[0].gyro;
sixaxis_fullkey_state.rotation = motion_state[0].rotation;
sixaxis_fullkey_state.orientation = motion_state[0].orientation;
}
set_motion_state(sixaxis_fullkey_state, motion_state[0]);
break;
case Core::HID::NpadStyleIndex::Handheld:
sixaxis_handheld_state.attribute.raw = 0;
if (controller.sixaxis_sensor_enabled) {
sixaxis_handheld_state.attribute.is_connected.Assign(1);
sixaxis_handheld_state.accel = motion_state[0].accel;
sixaxis_handheld_state.gyro = motion_state[0].gyro;
sixaxis_handheld_state.rotation = motion_state[0].rotation;
sixaxis_handheld_state.orientation = motion_state[0].orientation;
}
set_motion_state(sixaxis_handheld_state, motion_state[0]);
break;
case Core::HID::NpadStyleIndex::JoyconDual:
sixaxis_dual_left_state.attribute.raw = 0;
sixaxis_dual_right_state.attribute.raw = 0;
if (controller.sixaxis_sensor_enabled) {
// Set motion for the left joycon
sixaxis_dual_left_state.attribute.is_connected.Assign(1);
sixaxis_dual_left_state.accel = motion_state[0].accel;
sixaxis_dual_left_state.gyro = motion_state[0].gyro;
sixaxis_dual_left_state.rotation = motion_state[0].rotation;
sixaxis_dual_left_state.orientation = motion_state[0].orientation;
}
if (controller.sixaxis_sensor_enabled) {
// Set motion for the right joycon
sixaxis_dual_right_state.attribute.is_connected.Assign(1);
sixaxis_dual_right_state.accel = motion_state[1].accel;
sixaxis_dual_right_state.gyro = motion_state[1].gyro;
sixaxis_dual_right_state.rotation = motion_state[1].rotation;
sixaxis_dual_right_state.orientation = motion_state[1].orientation;
}
set_motion_state(sixaxis_dual_left_state, motion_state[0]);
set_motion_state(sixaxis_dual_right_state, motion_state[1]);
break;
case Core::HID::NpadStyleIndex::JoyconLeft:
sixaxis_left_lifo_state.attribute.raw = 0;
if (controller.sixaxis_sensor_enabled) {
sixaxis_left_lifo_state.attribute.is_connected.Assign(1);
sixaxis_left_lifo_state.accel = motion_state[0].accel;
sixaxis_left_lifo_state.gyro = motion_state[0].gyro;
sixaxis_left_lifo_state.rotation = motion_state[0].rotation;
sixaxis_left_lifo_state.orientation = motion_state[0].orientation;
}
set_motion_state(sixaxis_left_lifo_state, motion_state[0]);
break;
case Core::HID::NpadStyleIndex::JoyconRight:
sixaxis_right_lifo_state.attribute.raw = 0;
if (controller.sixaxis_sensor_enabled) {
sixaxis_right_lifo_state.attribute.is_connected.Assign(1);
sixaxis_right_lifo_state.accel = motion_state[1].accel;
sixaxis_right_lifo_state.gyro = motion_state[1].gyro;
sixaxis_right_lifo_state.rotation = motion_state[1].rotation;
sixaxis_right_lifo_state.orientation = motion_state[1].orientation;
}
set_motion_state(sixaxis_right_lifo_state, motion_state[1]);
break;
default:
break;

View File

@@ -37,8 +37,7 @@ namespace Service::HID {
// Period time is obtained by measuring the number of samples in a second on HW using a homebrew
constexpr auto pad_update_ns = std::chrono::nanoseconds{4 * 1000 * 1000}; // (4ms, 250Hz)
constexpr auto mouse_keyboard_update_ns = std::chrono::nanoseconds{8 * 1000 * 1000}; // (8ms, 125Hz)
// TODO: Correct update rate for motion is 5ms. Check why some games don't behave at that speed
constexpr auto motion_update_ns = std::chrono::nanoseconds{10 * 1000 * 1000}; // (10ms, 100Hz)
constexpr auto motion_update_ns = std::chrono::nanoseconds{5 * 1000 * 1000}; // (5ms, 200Hz)
IAppletResource::IAppletResource(Core::System& system_,
KernelHelpers::ServiceContext& service_context_)

View File

@@ -219,6 +219,7 @@ add_library(shader_recompiler STATIC
ir_opt/global_memory_to_storage_buffer_pass.cpp
ir_opt/identity_removal_pass.cpp
ir_opt/lower_fp16_to_fp32.cpp
ir_opt/lower_int16_to_int32.cpp
ir_opt/lower_int64_to_int32.cpp
ir_opt/passes.h
ir_opt/rescaling_pass.cpp

View File

@@ -103,6 +103,6 @@ struct fmt::formatter<Shader::IR::Opcode> {
}
template <typename FormatContext>
auto format(const Shader::IR::Opcode& op, FormatContext& ctx) {
return format_to(ctx.out(), "{}", Shader::IR::NameOf(op));
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(op));
}
};

View File

@@ -24,6 +24,6 @@ struct fmt::formatter<Shader::Maxwell::Opcode> {
}
template <typename FormatContext>
auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) {
return format_to(ctx.out(), "{}", NameOf(opcode));
return fmt::format_to(ctx.out(), "{}", NameOf(opcode));
}
};

View File

@@ -209,6 +209,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
if (!host_info.support_int64) {
Optimization::LowerInt64ToInt32(program);
}
if (!host_info.support_int16) {
Optimization::LowerInt16ToInt32(program);
}
Optimization::SsaRewritePass(program);
Optimization::ConstantPropagationPass(program);

View File

@@ -11,6 +11,7 @@ namespace Shader {
/// Misc information about the host
struct HostTranslateInfo {
bool support_float16{}; ///< True when the device supports 16-bit floats
bool support_int16{}; ///< True when the device supports 16-bit integers
bool support_int64{}; ///< True when the device supports 64-bit integers
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
};

View File

@@ -0,0 +1,72 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
namespace {
IR::Opcode Replace(IR::Opcode op) {
switch (op) {
case IR::Opcode::GetCbufU16:
case IR::Opcode::GetCbufS16:
return IR::Opcode::GetCbufU32;
case IR::Opcode::UndefU16:
return IR::Opcode::UndefU32;
case IR::Opcode::LoadGlobalU16:
case IR::Opcode::LoadGlobalS16:
return IR::Opcode::LoadGlobal32;
case IR::Opcode::WriteGlobalU16:
case IR::Opcode::WriteGlobalS16:
return IR::Opcode::WriteGlobal32;
case IR::Opcode::LoadStorageU16:
case IR::Opcode::LoadStorageS16:
return IR::Opcode::LoadStorage32;
case IR::Opcode::WriteStorageU16:
case IR::Opcode::WriteStorageS16:
return IR::Opcode::WriteStorage32;
case IR::Opcode::LoadSharedU16:
case IR::Opcode::LoadSharedS16:
return IR::Opcode::LoadSharedU32;
case IR::Opcode::WriteSharedU16:
return IR::Opcode::WriteSharedU32;
case IR::Opcode::SelectU16:
return IR::Opcode::SelectU32;
case IR::Opcode::BitCastU16F16:
return IR::Opcode::BitCastU32F32;
case IR::Opcode::BitCastF16U16:
return IR::Opcode::BitCastF32U32;
case IR::Opcode::ConvertS16F16:
case IR::Opcode::ConvertS16F32:
return IR::Opcode::ConvertS32F32;
case IR::Opcode::ConvertS16F64:
return IR::Opcode::ConvertS32F64;
case IR::Opcode::ConvertU16F16:
case IR::Opcode::ConvertU16F32:
return IR::Opcode::ConvertU32F32;
case IR::Opcode::ConvertU16F64:
return IR::Opcode::ConvertU32F64;
case IR::Opcode::ConvertF16S16:
case IR::Opcode::ConvertF32S16:
return IR::Opcode::ConvertF32S32;
case IR::Opcode::ConvertF16U16:
case IR::Opcode::ConvertF32U16:
return IR::Opcode::ConvertF32U32;
case IR::Opcode::ConvertF64S16:
case IR::Opcode::ConvertF64U16:
return IR::Opcode::ConvertF64U32;
default:
return op;
}
}
} // Anonymous namespace
void LowerInt16ToInt32(IR::Program& program) {
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
inst.ReplaceOpcode(Replace(inst.GetOpcode()));
}
}
}
} // namespace Shader::Optimization

View File

@@ -14,6 +14,7 @@ void DeadCodeEliminationPass(IR::Program& program);
void GlobalMemoryToStorageBufferPass(IR::Program& program);
void IdentityRemovalPass(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
void LowerInt16ToInt32(IR::Program& program);
void LowerInt64ToInt32(IR::Program& program);
void RescalingPass(IR::Program& program);
void SsaRewritePass(IR::Program& program);

View File

@@ -173,6 +173,8 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
case MAXWELL3D_REG_INDEX(shadow_ram_control):
shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
return;
case MAXWELL3D_REG_INDEX(macros.upload_address):
return macro_engine->ClearCode(regs.macros.upload_address);
case MAXWELL3D_REG_INDEX(macros.data):
return macro_engine->AddCode(regs.macros.upload_address, argument);
case MAXWELL3D_REG_INDEX(macros.bind):

View File

@@ -134,7 +134,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
// Deswizzle the input and copy it over.
UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
const u32 bytes_per_pixel =
regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1;
const Parameters& src_params = regs.src_params;
const u32 width = src_params.width;
const u32 height = src_params.height;
@@ -166,7 +167,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
const auto& dst_params = regs.dst_params;
const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
const u32 bytes_per_pixel =
regs.launch_dma.remap_enable ? regs.pitch_in / regs.line_length_in : 1;
const u32 width = dst_params.width;
const u32 height = dst_params.height;
const u32 depth = dst_params.depth;
@@ -210,7 +212,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
}
void MaxwellDMA::FastCopyBlockLinearToPitch() {
const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
const u32 bytes_per_pixel =
regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1;
const size_t src_size = GOB_SIZE;
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
u32 pos_x = regs.src_params.origin.x;

View File

@@ -2,11 +2,15 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include <fstream>
#include <optional>
#include <span>
#include <boost/container_hash/hash.hpp>
#include "common/assert.h"
#include "common/fs/fs.h"
#include "common/fs/path_util.h"
#include "common/settings.h"
#include "video_core/macro/macro.h"
#include "video_core/macro/macro_hle.h"
@@ -15,6 +19,23 @@
namespace Tegra {
static void Dump(u64 hash, std::span<const u32> code) {
const auto base_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
const auto macro_dir{base_dir / "macros"};
if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) {
LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories");
return;
}
const auto name{macro_dir / fmt::format("{:016x}.macro", hash)};
std::fstream macro_file(name, std::ios::out | std::ios::binary);
if (!macro_file) {
LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}",
Common::FS::PathToUTF8String(name));
return;
}
macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
}
MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
: hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
@@ -24,6 +45,11 @@ void MacroEngine::AddCode(u32 method, u32 data) {
uploaded_macro_code[method].push_back(data);
}
void MacroEngine::ClearCode(u32 method) {
macro_cache.erase(method);
uploaded_macro_code.erase(method);
}
void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
auto compiled_macro = macro_cache.find(method);
if (compiled_macro != macro_cache.end()) {
@@ -54,6 +80,9 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
if (!mid_method.has_value()) {
cache_info.lle_program = Compile(macro_code->second);
cache_info.hash = boost::hash_value(macro_code->second);
if (Settings::values.dump_macros) {
Dump(cache_info.hash, macro_code->second);
}
} else {
const auto& macro_cached = uploaded_macro_code[mid_method.value()];
const auto rebased_method = method - mid_method.value();
@@ -63,6 +92,9 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
code.size() * sizeof(u32));
cache_info.hash = boost::hash_value(code);
cache_info.lle_program = Compile(code);
if (Settings::values.dump_macros) {
Dump(cache_info.hash, code);
}
}
if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {

View File

@@ -117,6 +117,9 @@ public:
// Store the uploaded macro code to compile them when they're called.
void AddCode(u32 method, u32 data);
// Clear the code associated with a method.
void ClearCode(u32 method);
// Compiles the macro if its not in the cache, and executes the compiled macro
void Execute(u32 method, const std::vector<u32>& parameters);

View File

@@ -23,7 +23,8 @@ MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255
namespace Tegra {
namespace {
constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d;
constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11;
constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
@@ -31,6 +32,7 @@ constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
STATE,
RESULT,
MAX_PARAMETER,
PARAMETERS,
METHOD_ADDRESS,
BRANCH_HOLDER,
@@ -80,7 +82,7 @@ private:
u32 carry_flag{};
};
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
using ProgramType = void (*)(JITState*, const u32*);
using ProgramType = void (*)(JITState*, const u32*, const u32*);
struct OptimizerState {
bool can_skip_carry{};
@@ -112,7 +114,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
JITState state{};
state.maxwell3d = &maxwell3d;
state.registers = {};
program(&state, parameters.data());
program(&state, parameters.data(), parameters.data() + parameters.size());
}
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
@@ -488,6 +490,7 @@ void MacroJITx64Impl::Compile() {
// JIT state
mov(STATE, Common::X64::ABI_PARAM1);
mov(PARAMETERS, Common::X64::ABI_PARAM2);
mov(MAX_PARAMETER, Common::X64::ABI_PARAM3);
xor_(RESULT, RESULT);
xor_(METHOD_ADDRESS, METHOD_ADDRESS);
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
@@ -598,7 +601,22 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
return true;
}
static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) {
LOG_CRITICAL(HW_GPU,
"Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)",
parameter, max_parameter - sizeof(u32));
}
Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
Xbyak::Label parameter_ok{};
cmp(PARAMETERS, MAX_PARAMETER);
jb(parameter_ok, T_NEAR);
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(Common::X64::ABI_PARAM1, PARAMETERS);
mov(Common::X64::ABI_PARAM2, MAX_PARAMETER);
Common::X64::CallFarFunction(*this, &WarnInvalidParameter);
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
L(parameter_ok);
mov(eax, dword[PARAMETERS]);
add(PARAMETERS, sizeof(u32));
return eax;

View File

@@ -322,6 +322,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
};
host_info = Shader::HostTranslateInfo{
.support_float16 = device.IsFloat16Supported(),
.support_int16 = device.IsShaderInt16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
.needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,

View File

@@ -325,6 +325,8 @@ const char* ToString(VkResult result) noexcept {
return "VK_PIPELINE_COMPILE_REQUIRED_EXT";
case VkResult::VK_RESULT_MAX_ENUM:
return "VK_RESULT_MAX_ENUM";
case VkResult::VK_ERROR_COMPRESSION_EXHAUSTED_EXT:
return "VK_ERROR_COMPRESSION_EXHAUSTED_EXT";
}
return "Unknown";
}

View File

@@ -53,6 +53,8 @@ void ConfigureDebug::SetConfiguration() {
ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue());
ui->dump_shaders->setEnabled(runtime_lock);
ui->dump_shaders->setChecked(Settings::values.dump_shaders.GetValue());
ui->dump_macros->setEnabled(runtime_lock);
ui->dump_macros->setChecked(Settings::values.dump_macros.GetValue());
ui->disable_macro_jit->setEnabled(runtime_lock);
ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue());
ui->disable_loop_safety_checks->setEnabled(runtime_lock);
@@ -83,6 +85,7 @@ void ConfigureDebug::ApplyConfiguration() {
Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked();
Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked();
Settings::values.dump_shaders = ui->dump_shaders->isChecked();
Settings::values.dump_macros = ui->dump_macros->isChecked();
Settings::values.disable_shader_loop_safety_checks =
ui->disable_loop_safety_checks->isChecked();
Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked();

View File

@@ -118,6 +118,19 @@
</property>
</widget>
</item>
<item row="0" column="2">
<widget class="QCheckBox" name="dump_macros">
<property name="enabled">
<bool>true</bool>
</property>
<property name="toolTip">
<string>When checked, it will dump all the macro programs of the GPU</string>
</property>
<property name="text">
<string>Dump Maxwell Macros</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QCheckBox" name="disable_macro_jit">
<property name="enabled">