Compare commits

..

18 Commits

Author SHA1 Message Date
ameerj
e394e1ecc4 emit_glsl_atomic: Implement 32x2 fallback atomic ops 2022-01-29 19:56:03 -05:00
ameerj
90a0506d56 lower_int64_to_int32: Add 64-bit atomic fallbacks 2022-01-29 19:56:02 -05:00
ameerj
ad58d7eae7 shaders: Add U64->U32x2 Atomic fallback functions 2022-01-29 19:55:53 -05:00
Morph
11099dda2e Merge pull request #7791 from german77/wall_clock
wall_clock: Use standard wall clock if rtsc frequency is too low
2022-01-28 20:04:24 -05:00
Morph
64a68ccbb4 Merge pull request #7800 from ameerj/spirv-int64-storage
spirv_atomic: Define U32x2 storage buffers for 64-bit storage atomics
2022-01-28 20:03:50 -05:00
ameerj
4790ba7839 spirv_atomic: Define U32x2 storage buffers for 64-bit storage atomics
Some drivers do not support 64-bit atomics, and fallback to atomically modifying U32x2 vectors. This change ensures that U32x2 storage vectors are defined in the spir-v shader when 64-bit atomics are used.

Fixes a hang on some devices, notably Intel GPUs, when booting Pokemon Legends Arceus
2022-01-28 19:00:04 -05:00
Morph
1900abde13 Merge pull request #7784 from german77/ds5
input_common: Add DS5 to HD rumble list
2022-01-28 18:36:28 -05:00
Morph
60b5670577 Merge pull request #7787 from bunnei/scheduler-deadlock-fix
hle: kernel: KScheduler: Fix deadlock with core waiting for a thread lock that has migrated.
2022-01-28 18:30:29 -05:00
Morph
b00406c8e4 Merge pull request #7788 from ameerj/stream-buffer-begin
buffer_cache: Reduce stream buffer allocations when expanding from the left
2022-01-28 18:30:01 -05:00
Morph
8dea7fa129 Merge pull request #7786 from ameerj/vmnmx-sel
video_minimum_maximum: Implement src operand selectors
2022-01-28 18:24:56 -05:00
Morph
2241d8c971 Merge pull request #7799 from ameerj/amd-xfb
emit_spirv: Add Xfb execution mode when transform feedback is used
2022-01-28 17:55:17 -05:00
ameerj
beaf7654bb emit_spirv: Add Xfb execution mode when transform feedback is used
Fixes Transform Feedback on Vulkan AMD drivers.
2022-01-28 16:32:48 -05:00
bunnei
0dec42431f Merge pull request #7770 from german77/motion-threshold
input_common: Add option to configure gyro threshold
2022-01-27 15:44:04 -08:00
german77
e4c63d432d wall_clock: use standard wall clock if rtsc frequency is too low 2022-01-27 17:07:52 -06:00
ameerj
f300a1d54b buffer_cache: Reduce stream buffer allocations when expanding from the left
The existing stream buffer optimization accounts for size increases at the end of the allocated buffer.
This adds the same optimization, increasing the size from the beginning of the buffer as well to reduce buffer allocations when expanding the same buffer from the left.
2022-01-27 15:31:43 -05:00
bunnei
3a1a3dd0db hle: kernel: KScheduler: Fix deadlock with core waiting for a thread lock that has migrated.
- Previously, it was possible for a thread migration to occur from core A to core B.
- Next, core B waits on a guest lock that must be released by a thread queued for core A.
- Meanwhile, core A is still waiting on the core B's current thread lock - resulting in a deadlock.
- Fix this by try-locking the thread lock.
- Fixes softlocks in FF8 and Pokemon Legends Arceus.
2022-01-27 12:17:14 -08:00
Narr the Reg
fd1cef5616 input_common: Add DS5 to HD rumble list 2022-01-26 21:49:32 -06:00
german77
ebf19616f4 input_common: Add option to configure gyro threshold 2022-01-23 21:54:33 -06:00
21 changed files with 650 additions and 41 deletions

View File

@@ -72,7 +72,9 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
if (caps.invariant_tsc) {
rtsc_frequency = EstimateRDTSCFrequency();
}
if (rtsc_frequency == 0) {
// Fallback to StandardWallClock if rtsc period is higher than a nano second
if (rtsc_frequency <= 1000000000) {
return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
emulated_clock_frequency);
} else {

View File

@@ -749,6 +749,7 @@ void EmulatedController::SetMotion(const Common::Input::CallbackStatus& callback
raw_status.gyro.y.value,
raw_status.gyro.z.value,
});
emulated.SetGyroThreshold(raw_status.gyro.x.properties.threshold);
emulated.UpdateRotation(raw_status.delta_timestamp);
emulated.UpdateOrientation(raw_status.delta_timestamp);
force_update_motion = raw_status.force_update;

View File

@@ -10,7 +10,7 @@ namespace Core::HID {
MotionInput::MotionInput() {
// Initialize PID constants with default values
SetPID(0.3f, 0.005f, 0.0f);
SetGyroThreshold(0.00005f);
SetGyroThreshold(0.007f);
}
void MotionInput::SetPID(f32 new_kp, f32 new_ki, f32 new_kd) {
@@ -31,7 +31,7 @@ void MotionInput::SetGyroscope(const Common::Vec3f& gyroscope) {
gyro_bias = (gyro_bias * 0.9999f) + (gyroscope * 0.0001f);
}
if (gyro.Length2() < gyro_threshold) {
if (gyro.Length() < gyro_threshold) {
gyro = {};
} else {
only_accelerometer = false;

View File

@@ -258,7 +258,7 @@ private:
private:
constexpr void ClearAffinityBit(u64& affinity, s32 core) {
affinity &= ~(u64(1) << core);
affinity &= ~(UINT64_C(1) << core);
}
constexpr s32 GetNextCore(u64& affinity) {

View File

@@ -710,23 +710,19 @@ void KScheduler::Unload(KThread* thread) {
}
void KScheduler::Reload(KThread* thread) {
LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread ? thread->GetName() : "nullptr");
LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread->GetName());
if (thread) {
ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable.");
Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
cpu_core.LoadContext(thread->GetContext32());
cpu_core.LoadContext(thread->GetContext64());
cpu_core.SetTlsAddress(thread->GetTLSAddress());
cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
cpu_core.ClearExclusiveState();
}
Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
cpu_core.LoadContext(thread->GetContext32());
cpu_core.LoadContext(thread->GetContext64());
cpu_core.SetTlsAddress(thread->GetTLSAddress());
cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
cpu_core.ClearExclusiveState();
}
void KScheduler::SwitchContextStep2() {
// Load context of new thread
Reload(current_thread.load());
Reload(GetCurrentThread());
RescheduleCurrentCore();
}
@@ -735,13 +731,17 @@ void KScheduler::ScheduleImpl() {
KThread* previous_thread = GetCurrentThread();
KThread* next_thread = state.highest_priority_thread;
state.needs_scheduling = false;
state.needs_scheduling.store(false);
// We never want to schedule a null thread, so use the idle thread if we don't have a next.
if (next_thread == nullptr) {
next_thread = idle_thread;
}
if (next_thread->GetCurrentCore() != core_id) {
next_thread->SetCurrentCore(core_id);
}
// We never want to schedule a dummy thread, as these are only used by host threads for locking.
if (next_thread->GetThreadType() == ThreadType::Dummy) {
ASSERT_MSG(false, "Dummy threads should never be scheduled!");
@@ -755,14 +755,8 @@ void KScheduler::ScheduleImpl() {
return;
}
if (next_thread->GetCurrentCore() != core_id) {
next_thread->SetCurrentCore(core_id);
}
current_thread.store(next_thread);
// Update the CPU time tracking variables.
KProcess* const previous_process = system.Kernel().CurrentProcess();
UpdateLastContextSwitchTime(previous_thread, previous_process);
// Save context for previous thread
@@ -770,6 +764,10 @@ void KScheduler::ScheduleImpl() {
std::shared_ptr<Common::Fiber>* old_context;
old_context = &previous_thread->GetHostContext();
// Set the new thread.
current_thread.store(next_thread);
guard.Unlock();
Common::Fiber::YieldTo(*old_context, *switch_fiber);
@@ -797,8 +795,8 @@ void KScheduler::SwitchToCurrent() {
do {
auto next_thread = current_thread.load();
if (next_thread != nullptr) {
next_thread->context_guard.Lock();
if (next_thread->GetRawState() != ThreadState::Runnable) {
const auto locked = next_thread->context_guard.TryLock();
if (state.needs_scheduling.load()) {
next_thread->context_guard.Unlock();
break;
}
@@ -806,6 +804,9 @@ void KScheduler::SwitchToCurrent() {
next_thread->context_guard.Unlock();
break;
}
if (!locked) {
continue;
}
}
auto thread = next_thread ? next_thread : idle_thread;
Common::Fiber::YieldTo(switch_fiber, *thread->GetHostContext());

View File

@@ -109,8 +109,9 @@ public:
bool HasHDRumble() const {
if (sdl_controller) {
return (SDL_GameControllerGetType(sdl_controller.get()) ==
SDL_CONTROLLER_TYPE_NINTENDO_SWITCH_PRO);
const auto type = SDL_GameControllerGetType(sdl_controller.get());
return (type == SDL_CONTROLLER_TYPE_NINTENDO_SWITCH_PRO) ||
(type == SDL_CONTROLLER_TYPE_PS5);
}
return false;
}

View File

@@ -504,9 +504,10 @@ private:
class InputFromMotion final : public Common::Input::InputDevice {
public:
explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_,
explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_, float gyro_threshold_,
InputEngine* input_engine_)
: identifier(identifier_), motion_sensor(motion_sensor_), input_engine(input_engine_) {
: identifier(identifier_), motion_sensor(motion_sensor_), gyro_threshold(gyro_threshold_),
input_engine(input_engine_) {
UpdateCallback engine_callback{[this]() { OnChange(); }};
const InputIdentifier input_identifier{
.identifier = identifier,
@@ -525,8 +526,9 @@ public:
const auto basic_motion = input_engine->GetMotion(identifier, motion_sensor);
Common::Input::MotionStatus status{};
const Common::Input::AnalogProperties properties = {
.deadzone = 0.001f,
.deadzone = 0.0f,
.range = 1.0f,
.threshold = gyro_threshold,
.offset = 0.0f,
};
status.accel.x = {.raw_value = basic_motion.accel_x, .properties = properties};
@@ -551,6 +553,7 @@ public:
private:
const PadIdentifier identifier;
const int motion_sensor;
const float gyro_threshold;
int callback_key;
InputEngine* input_engine;
};
@@ -873,9 +876,11 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateMotionDevice(
if (params.Has("motion")) {
const auto motion_sensor = params.Get("motion", 0);
const auto gyro_threshold = params.Get("threshold", 0.007f);
input_engine->PreSetController(identifier);
input_engine->PreSetMotion(identifier, motion_sensor);
return std::make_unique<InputFromMotion>(identifier, motion_sensor, input_engine.get());
return std::make_unique<InputFromMotion>(identifier, motion_sensor, gyro_threshold,
input_engine.get());
}
const auto deadzone = std::clamp(params.Get("deadzone", 0.15f), 0.0f, 1.0f);

View File

@@ -372,6 +372,8 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 poin
ScalarU32 value);
void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
Register value);
void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
Register value);
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value);
void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
@@ -412,6 +414,24 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b
ScalarU32 offset, Register value);
void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, Register value);
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarF32 value);
void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
@@ -448,6 +468,17 @@ void EmitGlobalAtomicAnd64(EmitContext& ctx);
void EmitGlobalAtomicOr64(EmitContext& ctx);
void EmitGlobalAtomicXor64(EmitContext& ctx);
void EmitGlobalAtomicExchange64(EmitContext& ctx);
void EmitGlobalAtomicIAdd32x2(EmitContext& ctx);
void EmitGlobalAtomicSMin32x2(EmitContext& ctx);
void EmitGlobalAtomicUMin32x2(EmitContext& ctx);
void EmitGlobalAtomicSMax32x2(EmitContext& ctx);
void EmitGlobalAtomicUMax32x2(EmitContext& ctx);
void EmitGlobalAtomicInc32x2(EmitContext& ctx);
void EmitGlobalAtomicDec32x2(EmitContext& ctx);
void EmitGlobalAtomicAnd32x2(EmitContext& ctx);
void EmitGlobalAtomicOr32x2(EmitContext& ctx);
void EmitGlobalAtomicXor32x2(EmitContext& ctx);
void EmitGlobalAtomicExchange32x2(EmitContext& ctx);
void EmitGlobalAtomicAddF32(EmitContext& ctx);
void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
void EmitGlobalAtomicAddF32x2(EmitContext& ctx);

View File

@@ -311,6 +311,13 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 poin
ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
}
void EmitSharedAtomicExchange32x2([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] ScalarU32 pointer_offset,
[[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarU32 value) {
Atom(ctx, inst, binding, offset, value, "ADD", "U32");
@@ -411,6 +418,62 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val
Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
}
void EmitStorageAtomicIAdd32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicSMin32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicUMin32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicSMax32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicUMax32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicAnd32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicOr32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicXor32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicExchange32x2([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] ScalarU32 offset,
[[maybe_unused]] Register value) {
throw NotImplementedException("GLASM instruction");
}
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
ScalarU32 offset, ScalarF32 value) {
Atom(ctx, inst, binding, offset, value, "ADD", "F32");
@@ -537,6 +600,50 @@ void EmitGlobalAtomicExchange64(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicIAdd32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicSMin32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicUMin32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicSMax32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicUMax32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicInc32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicDec32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicAnd32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicOr32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicXor32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicExchange32x2(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}
void EmitGlobalAtomicAddF32(EmitContext&) {
throw NotImplementedException("GLASM instruction");
}

View File

@@ -105,6 +105,13 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_vi
pointer_offset, value, pointer_offset, value);
}
void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value) {
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, pointer_offset, pointer_offset);
ctx.Add("smem[{}>>2]={}.x;smem[({}+4)>>2]={}.y;", pointer_offset, value, pointer_offset, value);
}
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) {
ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
@@ -265,6 +272,97 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
}
void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset));
ctx.Add("{}_ssbo{}[{}>>2]+={}.x;{}_ssbo{}[({}>>2)+1]+={}.y;", ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset), value, ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset), value);
}
void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset));
ctx.Add("for(int "
"i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}",
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), value);
}
void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset));
ctx.Add("for(int i=0;i<2;++i){{ "
"{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],{}[i]);}}",
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), value);
}
void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset));
ctx.Add("for(int "
"i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}",
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), value);
}
void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
ctx.var_alloc.Consume(offset));
ctx.Add("for(int i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],{}[i]);}}",
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
binding.U32(), ctx.var_alloc.Consume(offset), value);
}
void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
ctx.AddU32x2("{}=uvec2(atomicAnd({}_ssbo{}[{}>>2],{}.x),atomicAnd({}_ssbo{}[({}>>2)+1],{}.y));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
}
void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
ctx.AddU32x2("{}=uvec2(atomicOr({}_ssbo{}[{}>>2],{}.x),atomicOr({}_ssbo{}[({}>>2)+1],{}.y));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
}
void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
ctx.AddU32x2("{}=uvec2(atomicXor({}_ssbo{}[{}>>2],{}.x),atomicXor({}_ssbo{}[({}>>2)+1],{}.y));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
}
void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) {
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
ctx.AddU32x2("{}=uvec2(atomicExchange({}_ssbo{}[{}>>2],{}.x),atomicExchange({}_ssbo{}[({}>>2)+"
"1],{}.y));",
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
}
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value) {
SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd");
@@ -388,6 +486,50 @@ void EmitGlobalAtomicExchange64(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicIAdd32x2(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicSMin32x2(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicUMin32x2(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicSMax32x2(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicUMax32x2(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicInc32x2(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicDec32x2(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicAnd32x2(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicOr32x2(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicXor32x2(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicExchange32x2(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}
void EmitGlobalAtomicAddF32(EmitContext&) {
throw NotImplementedException("GLSL Instrucion");
}

View File

@@ -442,6 +442,8 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi
std::string_view value);
void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value);
void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value);
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
@@ -482,6 +484,24 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset, std::string_view value);
void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
@@ -518,6 +538,17 @@ void EmitGlobalAtomicAnd64(EmitContext& ctx);
void EmitGlobalAtomicOr64(EmitContext& ctx);
void EmitGlobalAtomicXor64(EmitContext& ctx);
void EmitGlobalAtomicExchange64(EmitContext& ctx);
void EmitGlobalAtomicIAdd32x2(EmitContext& ctx);
void EmitGlobalAtomicSMin32x2(EmitContext& ctx);
void EmitGlobalAtomicUMin32x2(EmitContext& ctx);
void EmitGlobalAtomicSMax32x2(EmitContext& ctx);
void EmitGlobalAtomicUMax32x2(EmitContext& ctx);
void EmitGlobalAtomicInc32x2(EmitContext& ctx);
void EmitGlobalAtomicDec32x2(EmitContext& ctx);
void EmitGlobalAtomicAnd32x2(EmitContext& ctx);
void EmitGlobalAtomicOr32x2(EmitContext& ctx);
void EmitGlobalAtomicXor32x2(EmitContext& ctx);
void EmitGlobalAtomicExchange32x2(EmitContext& ctx);
void EmitGlobalAtomicAddF32(EmitContext& ctx);
void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
void EmitGlobalAtomicAddF32x2(EmitContext& ctx);

View File

@@ -387,6 +387,14 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
}
}
void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) {
if (ctx.runtime_info.xfb_varyings.empty()) {
return;
}
ctx.AddCapability(spv::Capability::TransformFeedback);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::Xfb);
}
void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) {
if (info.uses_sampled_1d) {
ctx.AddCapability(spv::Capability::Sampled1D);
@@ -442,9 +450,6 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
if (info.uses_sample_id) {
ctx.AddCapability(spv::Capability::SampleRateShading);
}
if (!ctx.runtime_info.xfb_varyings.empty()) {
ctx.AddCapability(spv::Capability::TransformFeedback);
}
if (info.uses_derivatives) {
ctx.AddCapability(spv::Capability::DerivativeControl);
}
@@ -484,6 +489,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
SetupSignedNanCapabilities(profile, program, ctx, main);
}
SetupCapabilities(profile, program.info, ctx);
SetupTransformFeedbackCapabilities(ctx, main);
PatchPhiNodes(program, ctx);
return ctx.Assemble();
}

View File

@@ -74,7 +74,7 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value&
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
}
LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
binding, offset, sizeof(u32[2]))};
const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
@@ -82,6 +82,17 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value&
ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result));
return original_value;
}
Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
binding, offset, sizeof(u32[2]))};
const Id original_value{ctx.OpLoad(ctx.U32[2], pointer)};
const Id result{(ctx.*non_atomic_func)(ctx.U32[2], value, original_value)};
ctx.OpStore(pointer, result);
return original_value;
}
} // Anonymous namespace
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
@@ -141,7 +152,7 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
}
LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
const Id pointer_1{SharedPointer(ctx, offset, 0)};
const Id pointer_2{SharedPointer(ctx, offset, 1)};
const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
@@ -152,6 +163,18 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2));
}
Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id offset, Id value) {
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
const Id pointer_1{SharedPointer(ctx, offset, 0)};
const Id pointer_2{SharedPointer(ctx, offset, 1)};
const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)};
ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U));
ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U));
return ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2);
}
Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) {
return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd);
@@ -267,7 +290,7 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
}
LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
binding, offset, sizeof(u32[2]))};
const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
@@ -275,6 +298,56 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const
return original;
}
Id EmitStorageAtomicIAdd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) {
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpIAdd);
}
Id EmitStorageAtomicSMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) {
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpSMin);
}
Id EmitStorageAtomicUMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) {
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpUMin);
}
Id EmitStorageAtomicSMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) {
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpSMax);
}
Id EmitStorageAtomicUMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) {
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpUMax);
}
Id EmitStorageAtomicAnd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) {
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseAnd);
}
Id EmitStorageAtomicOr32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) {
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseOr);
}
Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) {
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseXor);
}
Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding,
const IR::Value& offset, Id value) {
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
binding, offset, sizeof(u32[2]))};
const Id original{ctx.OpLoad(ctx.U32[2], pointer)};
ctx.OpStore(pointer, value);
return original;
}
Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value) {
const Id ssbo{ctx.ssbos[binding.U32()].U32};
@@ -418,6 +491,50 @@ Id EmitGlobalAtomicExchange64(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicIAdd32x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicSMin32x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicUMin32x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicSMax32x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicUMax32x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicInc32x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicDec32x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicAnd32x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicOr32x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicXor32x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicExchange32x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
Id EmitGlobalAtomicAddF32(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}

View File

@@ -335,6 +335,7 @@ Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value);
Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value);
Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value);
Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value);
Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id pointer_offset, Id value);
Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
@@ -375,6 +376,24 @@ Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::
Id value);
Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitStorageAtomicIAdd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitStorageAtomicSMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitStorageAtomicUMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitStorageAtomicSMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitStorageAtomicUMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitStorageAtomicAnd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitStorageAtomicOr32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding,
const IR::Value& offset, Id value);
Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
@@ -411,6 +430,17 @@ Id EmitGlobalAtomicAnd64(EmitContext& ctx);
Id EmitGlobalAtomicOr64(EmitContext& ctx);
Id EmitGlobalAtomicXor64(EmitContext& ctx);
Id EmitGlobalAtomicExchange64(EmitContext& ctx);
Id EmitGlobalAtomicIAdd32x2(EmitContext& ctx);
Id EmitGlobalAtomicSMin32x2(EmitContext& ctx);
Id EmitGlobalAtomicUMin32x2(EmitContext& ctx);
Id EmitGlobalAtomicSMax32x2(EmitContext& ctx);
Id EmitGlobalAtomicUMax32x2(EmitContext& ctx);
Id EmitGlobalAtomicInc32x2(EmitContext& ctx);
Id EmitGlobalAtomicDec32x2(EmitContext& ctx);
Id EmitGlobalAtomicAnd32x2(EmitContext& ctx);
Id EmitGlobalAtomicOr32x2(EmitContext& ctx);
Id EmitGlobalAtomicXor32x2(EmitContext& ctx);
Id EmitGlobalAtomicExchange32x2(EmitContext& ctx);
Id EmitGlobalAtomicAddF32(EmitContext& ctx);
Id EmitGlobalAtomicAddF16x2(EmitContext& ctx);
Id EmitGlobalAtomicAddF32x2(EmitContext& ctx);

View File

@@ -118,6 +118,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::SharedAtomicXor32:
case Opcode::SharedAtomicExchange32:
case Opcode::SharedAtomicExchange64:
case Opcode::SharedAtomicExchange32x2:
case Opcode::GlobalAtomicIAdd32:
case Opcode::GlobalAtomicSMin32:
case Opcode::GlobalAtomicUMin32:
@@ -138,6 +139,15 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::GlobalAtomicOr64:
case Opcode::GlobalAtomicXor64:
case Opcode::GlobalAtomicExchange64:
case Opcode::GlobalAtomicIAdd32x2:
case Opcode::GlobalAtomicSMin32x2:
case Opcode::GlobalAtomicUMin32x2:
case Opcode::GlobalAtomicSMax32x2:
case Opcode::GlobalAtomicUMax32x2:
case Opcode::GlobalAtomicAnd32x2:
case Opcode::GlobalAtomicOr32x2:
case Opcode::GlobalAtomicXor32x2:
case Opcode::GlobalAtomicExchange32x2:
case Opcode::GlobalAtomicAddF32:
case Opcode::GlobalAtomicAddF16x2:
case Opcode::GlobalAtomicAddF32x2:
@@ -165,6 +175,15 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::StorageAtomicOr64:
case Opcode::StorageAtomicXor64:
case Opcode::StorageAtomicExchange64:
case Opcode::StorageAtomicIAdd32x2:
case Opcode::StorageAtomicSMin32x2:
case Opcode::StorageAtomicUMin32x2:
case Opcode::StorageAtomicSMax32x2:
case Opcode::StorageAtomicUMax32x2:
case Opcode::StorageAtomicAnd32x2:
case Opcode::StorageAtomicOr32x2:
case Opcode::StorageAtomicXor32x2:
case Opcode::StorageAtomicExchange32x2:
case Opcode::StorageAtomicAddF32:
case Opcode::StorageAtomicAddF16x2:
case Opcode::StorageAtomicAddF32x2:

View File

@@ -341,6 +341,7 @@ OPCODE(SharedAtomicOr32, U32, U32,
OPCODE(SharedAtomicXor32, U32, U32, U32, )
OPCODE(SharedAtomicExchange32, U32, U32, U32, )
OPCODE(SharedAtomicExchange64, U64, U32, U64, )
OPCODE(SharedAtomicExchange32x2, U32x2, U32, U32x2, )
OPCODE(GlobalAtomicIAdd32, U32, U64, U32, )
OPCODE(GlobalAtomicSMin32, U32, U64, U32, )
@@ -362,6 +363,15 @@ OPCODE(GlobalAtomicAnd64, U64, U64,
OPCODE(GlobalAtomicOr64, U64, U64, U64, )
OPCODE(GlobalAtomicXor64, U64, U64, U64, )
OPCODE(GlobalAtomicExchange64, U64, U64, U64, )
OPCODE(GlobalAtomicIAdd32x2, U32x2, U32x2, U32x2, )
OPCODE(GlobalAtomicSMin32x2, U32x2, U32x2, U32x2, )
OPCODE(GlobalAtomicUMin32x2, U32x2, U32x2, U32x2, )
OPCODE(GlobalAtomicSMax32x2, U32x2, U32x2, U32x2, )
OPCODE(GlobalAtomicUMax32x2, U32x2, U32x2, U32x2, )
OPCODE(GlobalAtomicAnd32x2, U32x2, U32x2, U32x2, )
OPCODE(GlobalAtomicOr32x2, U32x2, U32x2, U32x2, )
OPCODE(GlobalAtomicXor32x2, U32x2, U32x2, U32x2, )
OPCODE(GlobalAtomicExchange32x2, U32x2, U32x2, U32x2, )
OPCODE(GlobalAtomicAddF32, F32, U64, F32, )
OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, )
OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, )
@@ -390,6 +400,15 @@ OPCODE(StorageAtomicAnd64, U64, U32,
OPCODE(StorageAtomicOr64, U64, U32, U32, U64, )
OPCODE(StorageAtomicXor64, U64, U32, U32, U64, )
OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, )
OPCODE(StorageAtomicIAdd32x2, U32x2, U32, U32, U32x2, )
OPCODE(StorageAtomicSMin32x2, U32x2, U32, U32, U32x2, )
OPCODE(StorageAtomicUMin32x2, U32x2, U32, U32, U32x2, )
OPCODE(StorageAtomicSMax32x2, U32x2, U32, U32, U32x2, )
OPCODE(StorageAtomicUMax32x2, U32x2, U32, U32, U32x2, )
OPCODE(StorageAtomicAnd32x2, U32x2, U32, U32, U32x2, )
OPCODE(StorageAtomicOr32x2, U32x2, U32, U32, U32x2, )
OPCODE(StorageAtomicXor32x2, U32x2, U32, U32, U32x2, )
OPCODE(StorageAtomicExchange32x2, U32x2, U32, U32, U32x2, )
OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, )
OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, )
OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, )

View File

@@ -360,6 +360,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::GlobalAtomicOr64:
case IR::Opcode::GlobalAtomicXor64:
case IR::Opcode::GlobalAtomicExchange64:
case IR::Opcode::GlobalAtomicIAdd32x2:
case IR::Opcode::GlobalAtomicSMin32x2:
case IR::Opcode::GlobalAtomicUMin32x2:
case IR::Opcode::GlobalAtomicSMax32x2:
case IR::Opcode::GlobalAtomicUMax32x2:
case IR::Opcode::GlobalAtomicAnd32x2:
case IR::Opcode::GlobalAtomicOr32x2:
case IR::Opcode::GlobalAtomicXor32x2:
case IR::Opcode::GlobalAtomicExchange32x2:
case IR::Opcode::GlobalAtomicAddF32:
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::GlobalAtomicAddF32x2:
@@ -597,6 +606,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
break;
case IR::Opcode::LoadStorage64:
case IR::Opcode::WriteStorage64:
case IR::Opcode::StorageAtomicIAdd32x2:
case IR::Opcode::StorageAtomicSMin32x2:
case IR::Opcode::StorageAtomicUMin32x2:
case IR::Opcode::StorageAtomicSMax32x2:
case IR::Opcode::StorageAtomicUMax32x2:
case IR::Opcode::StorageAtomicAnd32x2:
case IR::Opcode::StorageAtomicOr32x2:
case IR::Opcode::StorageAtomicXor32x2:
case IR::Opcode::StorageAtomicExchange32x2:
info.used_storage_buffer_types |= IR::Type::U32x2;
break;
case IR::Opcode::LoadStorage128:
@@ -688,7 +706,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::StorageAtomicAnd64:
case IR::Opcode::StorageAtomicOr64:
case IR::Opcode::StorageAtomicXor64:
info.used_storage_buffer_types |= IR::Type::U64;
info.used_storage_buffer_types |= IR::Type::U64 | IR::Type::U32x2;
info.uses_int64_bit_atomics = true;
break;
case IR::Opcode::BindlessImageAtomicIAdd32:

View File

@@ -92,6 +92,15 @@ bool IsGlobalMemory(const IR::Inst& inst) {
case IR::Opcode::GlobalAtomicOr64:
case IR::Opcode::GlobalAtomicXor64:
case IR::Opcode::GlobalAtomicExchange64:
case IR::Opcode::GlobalAtomicIAdd32x2:
case IR::Opcode::GlobalAtomicSMin32x2:
case IR::Opcode::GlobalAtomicUMin32x2:
case IR::Opcode::GlobalAtomicSMax32x2:
case IR::Opcode::GlobalAtomicUMax32x2:
case IR::Opcode::GlobalAtomicAnd32x2:
case IR::Opcode::GlobalAtomicOr32x2:
case IR::Opcode::GlobalAtomicXor32x2:
case IR::Opcode::GlobalAtomicExchange32x2:
case IR::Opcode::GlobalAtomicAddF32:
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::GlobalAtomicAddF32x2:
@@ -135,6 +144,15 @@ bool IsGlobalMemoryWrite(const IR::Inst& inst) {
case IR::Opcode::GlobalAtomicOr64:
case IR::Opcode::GlobalAtomicXor64:
case IR::Opcode::GlobalAtomicExchange64:
case IR::Opcode::GlobalAtomicIAdd32x2:
case IR::Opcode::GlobalAtomicSMin32x2:
case IR::Opcode::GlobalAtomicUMin32x2:
case IR::Opcode::GlobalAtomicSMax32x2:
case IR::Opcode::GlobalAtomicUMax32x2:
case IR::Opcode::GlobalAtomicAnd32x2:
case IR::Opcode::GlobalAtomicOr32x2:
case IR::Opcode::GlobalAtomicXor32x2:
case IR::Opcode::GlobalAtomicExchange32x2:
case IR::Opcode::GlobalAtomicAddF32:
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::GlobalAtomicAddF32x2:
@@ -199,6 +217,8 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
return IR::Opcode::StorageAtomicOr32;
case IR::Opcode::GlobalAtomicXor32:
return IR::Opcode::StorageAtomicXor32;
case IR::Opcode::GlobalAtomicExchange32:
return IR::Opcode::StorageAtomicExchange32;
case IR::Opcode::GlobalAtomicIAdd64:
return IR::Opcode::StorageAtomicIAdd64;
case IR::Opcode::GlobalAtomicSMin64:
@@ -215,10 +235,26 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
return IR::Opcode::StorageAtomicOr64;
case IR::Opcode::GlobalAtomicXor64:
return IR::Opcode::StorageAtomicXor64;
case IR::Opcode::GlobalAtomicExchange32:
return IR::Opcode::StorageAtomicExchange32;
case IR::Opcode::GlobalAtomicExchange64:
return IR::Opcode::StorageAtomicExchange64;
case IR::Opcode::GlobalAtomicIAdd32x2:
return IR::Opcode::StorageAtomicIAdd32x2;
case IR::Opcode::GlobalAtomicSMin32x2:
return IR::Opcode::StorageAtomicSMin32x2;
case IR::Opcode::GlobalAtomicUMin32x2:
return IR::Opcode::StorageAtomicUMin32x2;
case IR::Opcode::GlobalAtomicSMax32x2:
return IR::Opcode::StorageAtomicSMax32x2;
case IR::Opcode::GlobalAtomicUMax32x2:
return IR::Opcode::StorageAtomicUMax32x2;
case IR::Opcode::GlobalAtomicAnd32x2:
return IR::Opcode::StorageAtomicAnd32x2;
case IR::Opcode::GlobalAtomicOr32x2:
return IR::Opcode::StorageAtomicOr32x2;
case IR::Opcode::GlobalAtomicXor32x2:
return IR::Opcode::StorageAtomicXor32x2;
case IR::Opcode::GlobalAtomicExchange32x2:
return IR::Opcode::StorageAtomicExchange32x2;
case IR::Opcode::GlobalAtomicAddF32:
return IR::Opcode::StorageAtomicAddF32;
case IR::Opcode::GlobalAtomicAddF16x2:
@@ -454,6 +490,15 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
case IR::Opcode::GlobalAtomicOr64:
case IR::Opcode::GlobalAtomicXor64:
case IR::Opcode::GlobalAtomicExchange64:
case IR::Opcode::GlobalAtomicIAdd32x2:
case IR::Opcode::GlobalAtomicSMin32x2:
case IR::Opcode::GlobalAtomicUMin32x2:
case IR::Opcode::GlobalAtomicSMax32x2:
case IR::Opcode::GlobalAtomicUMax32x2:
case IR::Opcode::GlobalAtomicAnd32x2:
case IR::Opcode::GlobalAtomicOr32x2:
case IR::Opcode::GlobalAtomicXor32x2:
case IR::Opcode::GlobalAtomicExchange32x2:
case IR::Opcode::GlobalAtomicAddF32:
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::GlobalAtomicAddF32x2:

View File

@@ -199,6 +199,26 @@ void Lower(IR::Block& block, IR::Inst& inst) {
return ShiftRightLogical64To32(block, inst);
case IR::Opcode::ShiftRightArithmetic64:
return ShiftRightArithmetic64To32(block, inst);
case IR::Opcode::SharedAtomicExchange64:
return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2);
case IR::Opcode::GlobalAtomicIAdd64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicIAdd32x2);
case IR::Opcode::GlobalAtomicSMin64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMin32x2);
case IR::Opcode::GlobalAtomicUMin64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMin32x2);
case IR::Opcode::GlobalAtomicSMax64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMax32x2);
case IR::Opcode::GlobalAtomicUMax64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMax32x2);
case IR::Opcode::GlobalAtomicAnd64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicAnd32x2);
case IR::Opcode::GlobalAtomicOr64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicOr32x2);
case IR::Opcode::GlobalAtomicXor64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicXor32x2);
case IR::Opcode::GlobalAtomicExchange64:
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicExchange32x2);
default:
break;
}

View File

@@ -1474,6 +1474,8 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
// When this memory region has been joined a bunch of times, we assume it's being used
// as a stream buffer. Increase the size to skip constantly recreating buffers.
has_stream_leap = true;
begin -= PAGE_SIZE * 256;
cpu_addr = begin;
end += PAGE_SIZE * 256;
}
}

View File

@@ -403,10 +403,22 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
connect(button, &QPushButton::customContextMenuRequested,
[=, this](const QPoint& menu_location) {
QMenu context_menu;
Common::ParamPackage param = emulated_controller->GetMotionParam(motion_id);
context_menu.addAction(tr("Clear"), [&] {
emulated_controller->SetMotionParam(motion_id, {});
motion_map[motion_id]->setText(tr("[not set]"));
});
if (param.Has("motion")) {
context_menu.addAction(tr("Set gyro threshold"), [&] {
const int gyro_threshold =
static_cast<int>(param.Get("threshold", 0.007f) * 1000.0f);
const int new_threshold = QInputDialog::getInt(
this, tr("Set threshold"), tr("Choose a value between 0% and 100%"),
gyro_threshold, 0, 100);
param.Set("threshold", new_threshold / 1000.0f);
emulated_controller->SetMotionParam(motion_id, param);
});
}
context_menu.exec(motion_map[motion_id]->mapToGlobal(menu_location));
});
}