Compare commits

..

6 Commits

Author SHA1 Message Date
Fernando Sahmkow
10682ad7e0 shader_decompiler: Improve Accuracy of Attribute Interpolation. 2019-02-14 03:25:07 -04:00
bunnei
8490e7746a Merge pull request #2115 from lioncash/local
core_timing: Make EmptyTimedCallback a local variable
2019-02-13 21:42:04 -05:00
bunnei
f0c4ac9abd Merge pull request #2116 from lioncash/size
threadsafe_queue: Remove NeedSize template parameter
2019-02-13 21:41:25 -05:00
Lioncash
0829ef97ca threadsafe_queue: Use std::size_t for representing size
Makes it consistent with the regular standard containers in terms of
size representation. This also gets rid of dependence on our own
type aliases, removing the need for an include.
2019-02-12 22:39:53 -05:00
Lioncash
f0bfb24c61 threadsafe_queue: Remove NeedSize template parameter
The necessity of this parameter is dubious at best, and in 2019 probably
offers completely negligible savings as opposed to just leaving this
enabled. This removes it and simplifies the overall interface.
2019-02-12 22:09:51 -05:00
Lioncash
83ba3515ec core_timing: Make EmptyTimedCallback a local variable
Given this is only used in one place, it can be moved closest to its
usage site.
2019-02-12 21:47:18 -05:00
9 changed files with 96 additions and 64 deletions

View File

@@ -7,17 +7,16 @@
// a simple lockless thread-safe,
// single reader, single writer queue
#include <algorithm>
#include <atomic>
#include <cstddef>
#include <mutex>
#include "common/common_types.h"
#include <utility>
namespace Common {
template <typename T, bool NeedSize = true>
template <typename T>
class SPSCQueue {
public:
SPSCQueue() : size(0) {
SPSCQueue() {
write_ptr = read_ptr = new ElementPtr();
}
~SPSCQueue() {
@@ -25,13 +24,12 @@ public:
delete read_ptr;
}
u32 Size() const {
static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
std::size_t Size() const {
return size.load();
}
bool Empty() const {
return !read_ptr->next.load();
return Size() == 0;
}
T& Front() const {
@@ -47,13 +45,13 @@ public:
ElementPtr* new_ptr = new ElementPtr();
write_ptr->next.store(new_ptr, std::memory_order_release);
write_ptr = new_ptr;
if (NeedSize)
size++;
++size;
}
void Pop() {
if (NeedSize)
size--;
--size;
ElementPtr* tmpptr = read_ptr;
// advance the read pointer
read_ptr = tmpptr->next.load();
@@ -66,8 +64,7 @@ public:
if (Empty())
return false;
if (NeedSize)
size--;
--size;
ElementPtr* tmpptr = read_ptr;
read_ptr = tmpptr->next.load(std::memory_order_acquire);
@@ -89,7 +86,7 @@ private:
// and a pointer to the next ElementPtr
class ElementPtr {
public:
ElementPtr() : next(nullptr) {}
ElementPtr() {}
~ElementPtr() {
ElementPtr* next_ptr = next.load();
@@ -98,21 +95,21 @@ private:
}
T current;
std::atomic<ElementPtr*> next;
std::atomic<ElementPtr*> next{nullptr};
};
ElementPtr* write_ptr;
ElementPtr* read_ptr;
std::atomic<u32> size;
std::atomic_size_t size{0};
};
// a simple thread-safe,
// single reader, multiple writer queue
template <typename T, bool NeedSize = true>
template <typename T>
class MPSCQueue {
public:
u32 Size() const {
std::size_t Size() const {
return spsc_queue.Size();
}
@@ -144,7 +141,7 @@ public:
}
private:
SPSCQueue<T, NeedSize> spsc_queue;
SPSCQueue<T> spsc_queue;
std::mutex write_lock;
};
} // namespace Common

View File

@@ -54,10 +54,10 @@ static std::vector<Event> event_queue;
static u64 event_fifo_id;
// the queue for storing the events from other threads threadsafe until they will be added
// to the event_queue by the emu thread
static Common::MPSCQueue<Event, false> ts_queue;
static Common::MPSCQueue<Event> ts_queue;
// the queue for unscheduling the events from other threads threadsafe
static Common::MPSCQueue<std::pair<const EventType*, u64>, false> unschedule_queue;
static Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue;
constexpr int MAX_SLICE_LENGTH = 20000;
@@ -70,8 +70,6 @@ static bool is_global_timer_sane;
static EventType* ev_lost = nullptr;
static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}
EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
// check for existing type with same name.
// we want event type names to remain unique so that we can use them for serialization.
@@ -104,7 +102,9 @@ void Init() {
is_global_timer_sane = true;
event_fifo_id = 0;
ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback);
const auto empty_timed_callback = [](u64, s64) {};
ev_lost = RegisterEvent("_lost_event", empty_timed_callback);
}
void Shutdown() {

View File

@@ -398,8 +398,7 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
}
void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
std::ifstream file;
OpenFStream(file, filename, std::ios_base::in);
std::ifstream file(filename);
if (!file.is_open())
return;

View File

@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
};
enum class IpaInterpMode : u64 {
Linear = 0,
Perspective = 1,
Flat = 2,
Pass = 0,
Multiply = 1,
Constant = 2,
Sc = 3,
};

View File

@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
TriangleStrip = 7,
};
enum class AttributeUse : u8 {
Unused = 0,
Constant = 1,
Perspective = 2,
ScreenLinear = 3,
};
// Documentation in:
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
struct Header {
@@ -84,9 +91,15 @@ struct Header {
} vtg;
struct {
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
union {
BitField<0, 2, AttributeUse> x;
BitField<2, 2, AttributeUse> y;
BitField<4, 2, AttributeUse> w;
BitField<6, 2, AttributeUse> z;
u8 raw;
} imap_generic_vector[32];
INSERT_PADDING_BYTES(2); // ImapColor
INSERT_PADDING_BYTES(2); // ImapSystemValuesC
INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,6 +116,28 @@ struct Header {
const u32 bit = render_target * 4 + component;
return omap.target & (1 << bit);
}
AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
return static_cast<AttributeUse>(
(imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
}
AttributeUse GetAttributeUse(u32 attribute) const {
AttributeUse result = AttributeUse::Unused;
for (u32 i = 0; i < 4; i++) {
const auto index = GetAttributeIndexUse(attribute, i);
if (index == AttributeUse::Unused) {
continue;
}
if (result == AttributeUse::Unused || result == index) {
result = index;
continue;
}
LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
if (index == AttributeUse::Perspective) {
result = index;
}
}
return result;
}
} ps;
};

View File

@@ -20,6 +20,7 @@
namespace OpenGL::GLShader {
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::Header;
using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
@@ -288,34 +289,22 @@ private:
code.AddNewLine();
}
std::string GetInputFlags(const IpaMode& input_mode) {
const IpaSampleMode sample_mode = input_mode.sampling_mode;
const IpaInterpMode interp_mode = input_mode.interpolation_mode;
std::string GetInputFlags(AttributeUse attribute) {
std::string out;
switch (interp_mode) {
case IpaInterpMode::Flat:
switch (attribute) {
case AttributeUse::Constant:
out += "flat ";
break;
case IpaInterpMode::Linear:
case AttributeUse::ScreenLinear:
out += "noperspective ";
break;
case IpaInterpMode::Perspective:
case AttributeUse::Perspective:
// Default, Smooth
break;
default:
UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
}
switch (sample_mode) {
case IpaSampleMode::Centroid:
// It can be implemented with the "centroid " keyword in GLSL
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
break;
case IpaSampleMode::Default:
// Default, n/a
break;
default:
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
UNREACHABLE();
}
return out;
}
@@ -324,16 +313,11 @@ private:
const auto& attributes = ir.GetInputAttributes();
for (const auto element : attributes) {
const Attribute::Index index = element.first;
const IpaMode& input_mode = *element.second.begin();
if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
// Skip when it's not a generic attribute
continue;
}
ASSERT(element.second.size() > 0);
UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
"Multiple input flag modes are not supported in GLSL");
// TODO(bunnei): Use proper number of elements for these
u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
if (stage != ShaderStage::Vertex) {
@@ -345,8 +329,14 @@ private:
if (stage == ShaderStage::Geometry) {
attr = "gs_" + attr + "[]";
}
code.AddLine("layout (location = " + std::to_string(idx) + ") " +
GetInputFlags(input_mode) + "in vec4 " + attr + ';');
std::string suffix;
if (stage == ShaderStage::Fragment) {
const auto input_mode =
header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
suffix = GetInputFlags(input_mode);
}
code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
attr + ';');
}
if (!attributes.empty())
code.AddNewLine();
@@ -1571,4 +1561,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
return {decompiler.GetResult(), decompiler.GetShaderEntries()};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

View File

@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
layout (location = 0) in vec4 position;
layout (location = 0) in noperspective vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
return {out, program.second};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

View File

@@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
Tegra::Shader::IpaSampleMode::Default};
u64 next_element = instr.attribute.fmt20.element;

View File

@@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
instr.ipa.sample_mode.Value()};
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
Node value = attr;
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
index <= Tegra::Shader::Attribute::Index::Attribute_31) {
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
// In theory by setting them as perspective, OpenGL does the perspective correction.
// A way must figured to reverse the last step of it.
if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
}
}
value = GetSaturatedFloat(value, instr.ipa.saturate);
SetRegister(bb, instr.gpr0, value);
break;
@@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader