Compare commits

...

18 Commits

Author SHA1 Message Date
Fernando Sahmkow
11f4e739bd Shader_Ir: Implement F16 Variants of F2F, F2I, I2F.
This commit takes care of implementing the F16 Variants of the 
conversion instructions and makes sure conversions are done.
2019-07-20 17:38:25 -04:00
Fernando Sahmkow
0a67416971 Merge pull request #2693 from ReinUsesLisp/hsetp2
shader/half_set_predicate: Implement missing HSETP2 variants
2019-07-20 17:25:08 -04:00
Flame Sage
369be67039 Update README.md 2019-07-20 19:24:24 +00:00
Flame Sage
aa599ac709 Update README.md 2019-07-20 19:22:30 +00:00
Flame Sage
a2edb27158 Merge pull request #2752 from DarkLordZach/master
azure: Fix clang-format and releases
2019-07-20 15:20:53 -04:00
Zach Hilman
f470bcb826 azure: Fix clang-format and releases 2019-07-20 15:19:25 -04:00
ReinUsesLisp
45c162444d shader/half_set_predicate: Fix HSETP2 implementation 2019-07-19 22:21:22 -03:00
ReinUsesLisp
6c4985edc9 shader/half_set_predicate: Implement missing HSETP2 variants 2019-07-19 22:20:47 -03:00
bunnei
5d369112d9 Merge pull request #2687 from lioncash/tls-process
kernel/process: Allocate the process' TLS region during initialization
2019-07-18 13:53:04 -04:00
bunnei
63bda67a34 Merge pull request #2738 from lioncash/shader-ir
shader-ir: Minor cleanup-related changes
2019-07-18 13:52:01 -04:00
David
d4b95bfc25 Merge pull request #2741 from FernandoS27/trace-log
Kernel: Downgrade WaitForAddress and SignalToAddress messages to Trace.
2019-07-18 13:58:29 +10:00
Fernando Sahmkow
5e457bf258 Kernel: Downgrade WaitForAddress and SignalToAddress messages to Trace.
This messages were originally set as warnning since few games used these
svcs and it was needed for debugging. This is no longer the case.
2019-07-17 22:05:47 -04:00
Fernando Sahmkow
223a535f3f Merge pull request #2740 from lioncash/bra
shader/decode/other: Correct branch indirect argument within BRA handling
2019-07-17 14:25:08 -04:00
Rodrigo Locatti
c3218c110f Merge pull request #2726 from lioncash/access
core: Remove CurrentArmInterface() global accessor
2019-07-17 03:42:16 -03:00
Lioncash
e2d7dda166 shader/decode/other: Correct branch indirect argument within BRA handling
This appears to have been a copy/paste error introduced within
8a6fc529a9
2019-07-16 12:20:45 -04:00
Lioncash
093e5440e2 core: Remove CurrentArmInterface() global accessor
Replaces the final usage of the global accessor function and removes it.
Removes one more enabler of global state.
2019-07-12 21:48:49 -04:00
Lioncash
56c7912159 kernel/process: Allocate the process' TLS region during initialization
Prior to execution within a process beginning, the process establishes
its own TLS region for uses (as far as I can tell) related to exception
handling.

Now that TLS creation was decoupled from threads themselves, we can add
this behavior to our Process class. This is also good, as it allows us
to remove a stub within svcGetInfo, namely querying the address of that
region.
2019-07-07 14:08:28 -04:00
Lioncash
eb6f55d880 kernel/process: Move main thread stack allocation to its own function
Keeps this particular set of behavior isolated to its own function.
2019-07-07 14:08:25 -04:00
16 changed files with 189 additions and 136 deletions

View File

@@ -3,7 +3,7 @@ jobs:
displayName: 'standard'
pool:
vmImage: ubuntu-latest
strategy:
strategy:
maxParallel: 10
matrix:
windows:

View File

@@ -3,7 +3,7 @@ jobs:
displayName: 'testing'
pool:
vmImage: ubuntu-latest
strategy:
strategy:
maxParallel: 10
matrix:
windows:

View File

@@ -1,29 +0,0 @@
steps:
- task: DownloadPipelineArtifact@2
displayName: 'Download Windows Release'
inputs:
artifactName: 'yuzu-$(BuildName)-windows-mingw'
buildType: 'current'
targetPath: '$(Build.ArtifactStagingDirectory)'
- task: DownloadPipelineArtifact@2
displayName: 'Download Linux Release'
inputs:
artifactName: 'yuzu-$(BuildName)-linux'
buildType: 'current'
targetPath: '$(Build.ArtifactStagingDirectory)'
- task: DownloadPipelineArtifact@2
displayName: 'Download Release Point'
inputs:
artifactName: 'yuzu-$(BuildName)-release-point'
buildType: 'current'
targetPath: '$(Build.ArtifactStagingDirectory)'
- script: echo '##vso[task.setvariable variable=tagcommit]' && cat $(Build.ArtifactStagingDirectory)/tag-commit.sha
displayName: 'Calculate Release Point'
- task: GitHubRelease@0
inputs:
gitHubConnection: $(GitHubReleaseConnectionName)
repositoryName: '$(GitHubReleaseRepoName)'
action: 'create'
target: $(variables.tagcommit)
title: 'yuzu $(BuildName) #$(Build.BuildId)'
assets: '$(Build.ArtifactStagingDirectory)/*'

View File

@@ -2,6 +2,7 @@ yuzu emulator
=============
[![Travis CI Build Status](https://travis-ci.org/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.org/yuzu-emu/yuzu)
[![AppVeyor CI Build Status](https://ci.appveyor.com/api/projects/status/77k97svb2usreu68?svg=true)](https://ci.appveyor.com/project/bunnei/yuzu)
[![Azure Mainline CI Build Status](https://dev.azure.com/yuzu-emu/yuzu/_apis/build/status/yuzu%20mainline?branchName=master)](https://dev.azure.com/yuzu-emu/yuzu/)
yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/).

View File

@@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
void* user_data) {
auto* const system = static_cast<System*>(user_data);
ARM_Interface::ThreadContext ctx{};
Core::CurrentArmInterface().SaveContext(ctx);
system->CurrentArmInterface().SaveContext(ctx);
ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
ctx.pc, ctx.cpu_registers[30]);
return {};
return false;
}
ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
@@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
uc_hook hook{};
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1));
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1));
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1));
if (GDBStub::IsServerEnabled()) {
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1));
last_bkpt_hit = false;

View File

@@ -327,10 +327,6 @@ private:
static System s_instance;
};
inline ARM_Interface& CurrentArmInterface() {
return System::GetInstance().CurrentArmInterface();
}
inline Kernel::Process* CurrentProcess() {
return System::GetInstance().CurrentProcess();
}

View File

@@ -184,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
}
void Process::Run(s32 main_thread_priority, u64 stack_size) {
// The kernel always ensures that the given stack size is page aligned.
main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
// Allocate and map the main thread stack
// TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
// of the user address space.
const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
vm_manager
.MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
0, main_thread_stack_size, MemoryState::Stack)
.Unwrap();
AllocateMainThreadStack(stack_size);
tls_region_address = CreateTLSRegion();
vm_manager.LogLayout();
ChangeStatus(ProcessStatus::Running);
SetupMainThread(*this, kernel, main_thread_priority);
@@ -226,6 +218,9 @@ void Process::PrepareForTermination() {
stop_threads(system.Scheduler(2).GetThreadList());
stop_threads(system.Scheduler(3).GetThreadList());
FreeTLSRegion(tls_region_address);
tls_region_address = 0;
ChangeStatus(ProcessStatus::Exited);
}
@@ -325,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) {
WakeupAllWaitingThreads();
}
void Process::AllocateMainThreadStack(u64 stack_size) {
// The kernel always ensures that the given stack size is page aligned.
main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
// Allocate and map the main thread stack
const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
vm_manager
.MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
0, main_thread_stack_size, MemoryState::Stack)
.Unwrap();
}
} // namespace Kernel

View File

@@ -135,6 +135,11 @@ public:
return mutex;
}
/// Gets the address to the process' dedicated TLS region.
VAddr GetTLSRegionAddress() const {
return tls_region_address;
}
/// Gets the current status of the process
ProcessStatus GetStatus() const {
return status;
@@ -296,6 +301,9 @@ private:
/// a process signal.
void ChangeStatus(ProcessStatus new_status);
/// Allocates the main thread stack for the process, given the stack size in bytes.
void AllocateMainThreadStack(u64 stack_size);
/// Memory manager for this process.
Kernel::VMManager vm_manager;
@@ -358,6 +366,9 @@ private:
/// variable related facilities.
Mutex mutex;
/// Address indicating the location of the process' dedicated TLS region.
VAddr tls_region_address = 0;
/// Random values for svcGetInfo RandomEntropy
std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{};

View File

@@ -843,9 +843,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
return RESULT_SUCCESS;
case GetInfoType::UserExceptionContextAddr:
LOG_WARNING(Kernel_SVC,
"(STUBBED) Attempted to query user exception context address, returned 0");
*result = 0;
*result = process->GetTLSRegionAddress();
return RESULT_SUCCESS;
case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
@@ -1739,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
// Wait for an address (via Address Arbiter)
static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
s64 timeout) {
LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}",
address, type, value, timeout);
LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
type, value, timeout);
// If the passed address is a kernel virtual address, return invalid memory state.
if (Memory::IsKernelVirtualAddress(address)) {
@@ -1762,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
// Signals to an address (via Address Arbiter)
static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
s32 num_to_wake) {
LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
address, type, value, num_to_wake);
LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
address, type, value, num_to_wake);
// If the passed address is a kernel virtual address, return invalid memory state.
if (Memory::IsKernelVirtualAddress(address)) {

View File

@@ -931,8 +931,6 @@ union Instruction {
} csetp;
union {
BitField<35, 4, PredCondition> cond;
BitField<49, 1, u64> h_and;
BitField<6, 1, u64> ftz;
BitField<45, 2, PredOperation> op;
BitField<3, 3, u64> pred3;
@@ -940,9 +938,21 @@ union Instruction {
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_a;
BitField<47, 2, HalfType> type_a;
BitField<31, 1, u64> negate_b;
BitField<30, 1, u64> abs_b;
BitField<28, 2, HalfType> type_b;
union {
BitField<35, 4, PredCondition> cond;
BitField<49, 1, u64> h_and;
BitField<31, 1, u64> negate_b;
BitField<30, 1, u64> abs_b;
BitField<28, 2, HalfType> type_b;
} reg;
union {
BitField<56, 1, u64> negate_b;
BitField<54, 1, u64> abs_b;
} cbuf;
union {
BitField<49, 4, PredCondition> cond;
BitField<53, 1, u64> h_and;
} cbuf_and_imm;
BitField<42, 1, u64> neg_pred;
BitField<39, 3, u64> pred39;
} hsetp2;
@@ -1008,8 +1018,6 @@ union Instruction {
} f2i;
union {
BitField<8, 2, Register::Size> src_size;
BitField<10, 2, Register::Size> dst_size;
BitField<39, 4, u64> rounding;
// H0, H1 extract for F16 missing
BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
@@ -1548,7 +1556,9 @@ public:
HFMA2_RC,
HFMA2_RR,
HFMA2_IMM_R,
HSETP2_C,
HSETP2_R,
HSETP2_IMM,
HSET2_R,
POPC_C,
POPC_R,
@@ -1831,7 +1841,9 @@ private:
INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"),
INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),

View File

@@ -254,10 +254,6 @@ public:
}
private:
using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
using OperationDecompilersArray =
std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
void DeclareVertex() {
if (stage != ShaderStage::Vertex)
return;
@@ -1126,6 +1122,16 @@ private:
Type::Float);
}
std::string FCastHalf0(Operation operation) {
const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
return fmt::format("({})[0]", op_a);
}
std::string FCastHalf1(Operation operation) {
const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
return fmt::format("({})[1]", op_a);
}
template <Type type>
std::string Min(Operation operation) {
return GenerateBinaryCall(operation, "min", type, type, type);
@@ -1282,6 +1288,11 @@ private:
return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
}
std::string HCastFloat(Operation operation) {
const std::string op_a = VisitOperand(operation, 0, Type::Float);
return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a);
}
std::string HUnpack(Operation operation) {
const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
const auto value = [&]() -> std::string {
@@ -1400,14 +1411,10 @@ private:
return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
}
std::string LogicalAll2(Operation operation) {
std::string LogicalAnd2(Operation operation) {
return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
}
std::string LogicalAny2(Operation operation) {
return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
}
template <bool with_nan>
std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
@@ -1714,7 +1721,7 @@ private:
return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
}
static constexpr OperationDecompilersArray operation_decompilers = {
static constexpr std::array operation_decompilers = {
&GLSLDecompiler::Assign,
&GLSLDecompiler::Select,
@@ -1726,6 +1733,8 @@ private:
&GLSLDecompiler::Negate<Type::Float>,
&GLSLDecompiler::Absolute<Type::Float>,
&GLSLDecompiler::FClamp,
&GLSLDecompiler::FCastHalf0,
&GLSLDecompiler::FCastHalf1,
&GLSLDecompiler::Min<Type::Float>,
&GLSLDecompiler::Max<Type::Float>,
&GLSLDecompiler::FCos,
@@ -1786,6 +1795,7 @@ private:
&GLSLDecompiler::Absolute<Type::HalfFloat>,
&GLSLDecompiler::HNegate,
&GLSLDecompiler::HClamp,
&GLSLDecompiler::HCastFloat,
&GLSLDecompiler::HUnpack,
&GLSLDecompiler::HMergeF32,
&GLSLDecompiler::HMergeH0,
@@ -1798,8 +1808,7 @@ private:
&GLSLDecompiler::LogicalXor,
&GLSLDecompiler::LogicalNegate,
&GLSLDecompiler::LogicalPick2,
&GLSLDecompiler::LogicalAll2,
&GLSLDecompiler::LogicalAny2,
&GLSLDecompiler::LogicalAnd2,
&GLSLDecompiler::LogicalLessThan<Type::Float>,
&GLSLDecompiler::LogicalEqual<Type::Float>,
@@ -1863,6 +1872,7 @@ private:
&GLSLDecompiler::WorkGroupId<1>,
&GLSLDecompiler::WorkGroupId<2>,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
std::string GetRegister(u32 index) const {
return GetDeclarationWithSuffix(index, "gpr");

View File

@@ -205,10 +205,6 @@ public:
}
private:
using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
using OperationDecompilersArray =
std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
void AllocateBindings() {
@@ -739,6 +735,16 @@ private:
return {};
}
Id FCastHalf0(Operation operation) {
UNIMPLEMENTED();
return {};
}
Id FCastHalf1(Operation operation) {
UNIMPLEMENTED();
return {};
}
Id HNegate(Operation operation) {
UNIMPLEMENTED();
return {};
@@ -749,6 +755,11 @@ private:
return {};
}
Id HCastFloat(Operation operation) {
UNIMPLEMENTED();
return {};
}
Id HUnpack(Operation operation) {
UNIMPLEMENTED();
return {};
@@ -804,12 +815,7 @@ private:
return {};
}
Id LogicalAll2(Operation operation) {
UNIMPLEMENTED();
return {};
}
Id LogicalAny2(Operation operation) {
Id LogicalAnd2(Operation operation) {
UNIMPLEMENTED();
return {};
}
@@ -1206,7 +1212,7 @@ private:
return {};
}
static constexpr OperationDecompilersArray operation_decompilers = {
static constexpr std::array operation_decompilers = {
&SPIRVDecompiler::Assign,
&SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
@@ -1219,6 +1225,8 @@ private:
&SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
&SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
&SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
&SPIRVDecompiler::FCastHalf0,
&SPIRVDecompiler::FCastHalf1,
&SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
&SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
&SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
@@ -1279,6 +1287,7 @@ private:
&SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
&SPIRVDecompiler::HNegate,
&SPIRVDecompiler::HClamp,
&SPIRVDecompiler::HCastFloat,
&SPIRVDecompiler::HUnpack,
&SPIRVDecompiler::HMergeF32,
&SPIRVDecompiler::HMergeH0,
@@ -1291,8 +1300,7 @@ private:
&SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
&SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
&SPIRVDecompiler::LogicalPick2,
&SPIRVDecompiler::LogicalAll2,
&SPIRVDecompiler::LogicalAny2,
&SPIRVDecompiler::LogicalAnd2,
&SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
&SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
@@ -1357,6 +1365,7 @@ private:
&SPIRVDecompiler::WorkGroupId<1>,
&SPIRVDecompiler::WorkGroupId<2>,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
const VKDevice& device;
const ShaderIR& ir;

View File

@@ -57,7 +57,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2F_R:
case OpCode::Id::I2F_C:
case OpCode::Id::I2F_IMM: {
UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
UNIMPLEMENTED_IF(instr.conversion.selector);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
@@ -82,14 +82,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
if (instr.conversion.dst_size == Register::Size::Short) {
value = Operation(OperationCode::HCastFloat, PRECISE, value);
}
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::F2F_R:
case OpCode::Id::F2F_C:
case OpCode::Id::F2F_IMM: {
UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in F2F is not implemented");
@@ -107,6 +112,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
}
}();
if (instr.conversion.src_size == Register::Size::Short) {
// TODO: figure where extract is sey in the encoding
value = Operation(OperationCode::FCastHalf0, PRECISE, value);
}
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
value = [&]() {
@@ -124,19 +134,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
default:
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
static_cast<u32>(instr.conversion.f2f.rounding.Value()));
return Immediate(0);
return value;
}
}();
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
if (instr.conversion.dst_size == Register::Size::Short) {
value = Operation(OperationCode::HCastFloat, PRECISE, value);
}
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::F2I_R:
case OpCode::Id::F2I_C:
case OpCode::Id::F2I_IMM: {
UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in F2I is not implemented");
Node value = [&]() {
@@ -153,6 +168,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
}
}();
if (instr.conversion.src_size == Register::Size::Short) {
// TODO: figure where extract is sey in the encoding
value = Operation(OperationCode::FCastHalf0, PRECISE, value);
}
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
value = [&]() {

View File

@@ -23,38 +23,51 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
Node op_b = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::HSETP2_R:
return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
instr.hsetp2.negate_b);
default:
UNREACHABLE();
return Immediate(0);
}
}();
op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b);
// We can't use the constant predicate as destination.
ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
Tegra::Shader::PredCondition cond{};
bool h_and{};
Node op_b{};
switch (opcode->get().GetId()) {
case OpCode::Id::HSETP2_C:
cond = instr.hsetp2.cbuf_and_imm.cond;
h_and = instr.hsetp2.cbuf_and_imm.h_and;
op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
break;
case OpCode::Id::HSETP2_IMM:
cond = instr.hsetp2.cbuf_and_imm.cond;
h_and = instr.hsetp2.cbuf_and_imm.h_and;
op_b = UnpackHalfImmediate(instr, true);
break;
case OpCode::Id::HSETP2_R:
cond = instr.hsetp2.reg.cond;
h_and = instr.hsetp2.reg.h_and;
op_b =
UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b,
instr.hsetp2.reg.negate_b),
instr.hsetp2.reg.type_b);
break;
default:
UNREACHABLE();
op_b = Immediate(0);
}
const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
const OperationCode pair_combiner =
instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
const Node first_pred = Operation(pair_combiner, comparison);
const auto Write = [&](u64 dest, Node src) {
SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39));
};
// Set the primary predicate to the result of Predicate OP SecondPredicate
const Node value = Operation(combiner, first_pred, second_pred);
SetPredicate(bb, instr.hsetp2.pred3, value);
if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred);
SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred));
const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
const u64 first = instr.hsetp2.pred0;
const u64 second = instr.hsetp2.pred3;
if (h_and) {
const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
Write(first, joined);
Write(second, Operation(OperationCode::LogicalNegate, joined));
} else {
Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u)));
Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u)));
}
return pc;

View File

@@ -102,7 +102,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
PRECISE, op_a, Immediate(3));
const Node operand =
Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
branch = Operation(OperationCode::BranchIndirect, convert);
branch = Operation(OperationCode::BranchIndirect, operand);
}
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;

View File

@@ -30,6 +30,8 @@ enum class OperationCode {
FNegate, /// (MetaArithmetic, float a) -> float
FAbsolute, /// (MetaArithmetic, float a) -> float
FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float
FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float
FMin, /// (MetaArithmetic, float a, float b) -> float
FMax, /// (MetaArithmetic, float a, float b) -> float
FCos, /// (MetaArithmetic, float a) -> float
@@ -83,17 +85,18 @@ enum class OperationCode {
UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
UBitCount, /// (MetaArithmetic, uint) -> uint
HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
HAbsolute, /// (f16vec2 a) -> f16vec2
HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
HMergeF32, /// (f16vec2 src) -> float
HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
HPack2, /// (float a, float b) -> f16vec2
HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
HAbsolute, /// (f16vec2 a) -> f16vec2
HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
HMergeF32, /// (f16vec2 src) -> float
HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
HPack2, /// (float a, float b) -> f16vec2
LogicalAssign, /// (bool& dst, bool src) -> void
LogicalAnd, /// (bool a, bool b) -> bool
@@ -101,8 +104,7 @@ enum class OperationCode {
LogicalXor, /// (bool a, bool b) -> bool
LogicalNegate, /// (bool a) -> bool
LogicalPick2, /// (bool2 pair, uint index) -> bool
LogicalAll2, /// (bool2 a) -> bool
LogicalAny2, /// (bool2 a) -> bool
LogicalAnd2, /// (bool2 a) -> bool
LogicalFLessThan, /// (float a, float b) -> bool
LogicalFEqual, /// (float a, float b) -> bool