Compare commits

..

7 Commits

Author SHA1 Message Date
Subv
2933521a08 GPU: Use the bf bit in FSET to determine whether to write 0xFFFFFFFF or 1.0f. 2018-06-04 16:41:28 -05:00
Subv
f6679ce422 GPU: Corrected the I2F_R implementation. 2018-06-04 16:41:27 -05:00
bunnei
049ce242a4 Merge pull request #499 from bunnei/am-stuff
am: Implement CreateStorage, PushInData, etc.
2018-06-03 23:43:52 -04:00
bunnei
ba117854f9 Merge pull request #500 from Subv/long_queries
GPU: Partial implementation of long GPU queries.
2018-06-03 21:24:50 -04:00
bunnei
527c098ff6 Merge pull request #498 from bunnei/texs-mask
gl_shader_decompiler: Implement TEXS component mask.
2018-06-03 21:22:12 -04:00
Subv
d57333406d GPU: Partial implementation of long GPU queries.
Long queries write a 128-bit result value to memory, which consists of a 64 bit query value and a 64 bit timestamp.

In this implementation, only select=Zero of the Crop unit is implemented, this writes the query sequence as a 64 bit value, and a 0u64 value for the timestamp, since we emulate an infinitely fast GPU.

This specific type was hwtested, but more rigorous tests should be performed in the future for the other types.
2018-06-03 19:17:31 -05:00
bunnei
1efcba346a gl_shader_decompiler: Implement TEXS component mask. 2018-06-03 12:08:17 -04:00
3 changed files with 69 additions and 22 deletions

View File

@@ -156,16 +156,15 @@ void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
"Units other than CROP are unimplemented");
ASSERT_MSG(regs.query.query_get.short_query,
"Writing the entire query result structure is unimplemented");
u32 value = Memory::Read32(*address);
u32 result = 0;
u64 result = 0;
// TODO(Subv): Support the other query variables
switch (regs.query.query_get.select) {
case Regs::QuerySelect::Zero:
result = 0;
// This seems to actually write the query sequence to the query address.
result = regs.query.query_sequence;
break;
default:
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
@@ -174,15 +173,31 @@ void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Research and implement how query sync conditions work.
struct LongQueryResult {
u64_le value;
u64_le timestamp;
};
static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
switch (regs.query.query_get.mode) {
case Regs::QueryMode::Write:
case Regs::QueryMode::Write2: {
// Write the current query sequence to the sequence address.
u32 sequence = regs.query.query_sequence;
Memory::Write32(*address, sequence);
// TODO(Subv): Write the proper query response structure to the address when not using short
// mode.
if (regs.query.query_get.short_query) {
// Write the current query sequence to the sequence address.
// TODO(Subv): Find out what happens if you use a long query type but mark it as a short
// query.
Memory::Write32(*address, sequence);
} else {
// Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
// GPU, this command may actually take a while to complete in real hardware due to GPU
// wait queues.
LongQueryResult query_result{};
query_result.value = result;
// TODO(Subv): Generate a real GPU timestamp and write it here instead of 0
query_result.timestamp = 0;
Memory::WriteBlock(*address, &query_result, sizeof(query_result));
}
break;
}
default:

View File

@@ -245,9 +245,9 @@ union Instruction {
BitField<44, 1, u64> abs_b;
BitField<45, 2, PredOperation> op;
BitField<48, 4, PredCondition> cond;
BitField<52, 1, u64> bf;
BitField<53, 1, u64> neg_b;
BitField<54, 1, u64> abs_a;
BitField<52, 1, u64> bf;
BitField<55, 1, u64> ftz;
BitField<56, 1, u64> neg_imm;
} fset;
@@ -270,8 +270,22 @@ union Instruction {
} tex;
union {
// TODO(bunnei): This is just a guess, needs to be verified
BitField<52, 1, u64> enable_g_component;
BitField<50, 3, u64> component_mask_selector;
BitField<28, 8, Register> gpr28;
bool HasTwoDestinations() const {
return gpr28.Value() != Register::ZeroIndex;
}
bool IsComponentEnabled(size_t component) const {
static constexpr std::array<size_t, 5> one_dest_mask{0x1, 0x2, 0x4, 0x8, 0x3};
static constexpr std::array<size_t, 5> two_dest_mask{0x7, 0xb, 0xd, 0xe, 0xf};
const auto& mask{HasTwoDestinations() ? two_dest_mask : one_dest_mask};
ASSERT(component_mask_selector < mask.size());
return ((1 << component) & mask[component_mask_selector]) != 0;
}
} texs;
BitField<61, 1, u64> is_b_imm;

View File

@@ -849,8 +849,7 @@ private:
ASSERT_MSG(!instr.conversion.saturate_a, "Unimplemented");
switch (opcode->GetId()) {
case OpCode::Id::I2I_R:
case OpCode::Id::I2F_R: {
case OpCode::Id::I2I_R: {
ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
std::string op_a =
@@ -863,6 +862,17 @@ private:
regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_signed, 0, op_a, 1, 1);
break;
}
case OpCode::Id::I2F_R: {
std::string op_a =
regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed);
if (instr.conversion.abs_a) {
op_a = "abs(" + op_a + ')';
}
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
break;
}
case OpCode::Id::F2F_R: {
std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
@@ -938,18 +948,21 @@ private:
// TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA goes
// into gpr28+0 and gpr28+1
size_t offset{};
for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) {
for (unsigned elem = 0; elem < 2; ++elem) {
if (dest + elem >= Register::ZeroIndex) {
// Skip invalid register values
break;
if (!instr.texs.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
}
regs.SetRegisterToFloat(dest, elem + offset, texture, 1, 4, false, elem);
if (!instr.texs.enable_g_component) {
// Skip the second component
break;
}
}
if (!instr.texs.HasTwoDestinations()) {
// Skip the second destination
break;
}
offset += 2;
}
--shader.scope;
@@ -1053,7 +1066,12 @@ private:
std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
combiner + " (" + second_pred + "))";
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
if (instr.fset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
} else {
regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
1);
}
break;
}
default: {