GPU: Implemented the iadd32i shader instruction.

Merge pull request #555 from Subv/gpu_sysregs
GPU: Convert the gl_InstanceId and gl_VertexID variables to floats when reading from them.
2018-06-12 11:46:45 -05:00 · 2018-06-10 20:55:27 -04:00 · 2018-06-10 13:50:19 -05:00 · 2018-06-10 10:50:38 -04:00 · 2018-06-09 16:19:13 -05:00 · 2018-06-09 15:56:50 -05:00
14 changed files with 253 additions and 102 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -42,3 +42,7 @@ notifications:
  webhooks:
    urls:
      - https://api.yuzu-emu.org/code/travis/notify
+
+cache:
+  directories:
+    - $HOME/.ccache
--- a/.travis/linux/build.sh
+++ b/.travis/linux/build.sh
@@ -1,3 +1,3 @@
 #!/bin/bash -ex

-docker run -v $(pwd):/yuzu ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh
+docker run -e CCACHE_DIR=/ccache -v $HOME/.ccache:/ccache -v $(pwd):/yuzu ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh
--- a/.travis/linux/docker.sh
+++ b/.travis/linux/docker.sh
@@ -1,16 +1,18 @@
 #!/bin/bash -ex

 apt-get update
-apt-get install -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev wget ninja-build
-
-# Get a recent version of CMake
-wget https://cmake.org/files/v3.10/cmake-3.10.1-Linux-x86_64.sh
-sh cmake-3.10.1-Linux-x86_64.sh --exclude-subdir --prefix=/ --skip-license
+apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev wget cmake ninja-build ccache

 cd /yuzu

+export PATH=/usr/lib/ccache:$PATH
+ln -sf /usr/bin/ccache /usr/lib/ccache/cc
+ln -sf /usr/bin/ccache /usr/lib/ccache/c++
 mkdir build && cd build
+ccache --show-stats > ccache_before
 cmake .. -DYUZU_BUILD_UNICORN=ON -DCMAKE_BUILD_TYPE=Release -G Ninja
 ninja
+ccache --show-stats > ccache_after
+diff -U100 ccache_before ccache_after || true

 ctest -VV -C Release
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -7,8 +7,12 @@ export Qt5_DIR=$(brew --prefix)/opt/qt5
 export UNICORNDIR=$(pwd)/externals/unicorn

 mkdir build && cd build
+export PATH=/usr/local/opt/ccache/libexec:$PATH
+ccache --show-stats > ccache_before
 cmake --version
 cmake .. -DYUZU_BUILD_UNICORN=ON -DCMAKE_BUILD_TYPE=Release
 make -j4
+ccache --show-stats > ccache_after
+diff -U100 ccache_before ccache_after || true

 ctest -VV -C Release
--- a/.travis/macos/deps.sh
+++ b/.travis/macos/deps.sh
@@ -1,5 +1,5 @@
 #!/bin/sh -ex

 brew update
-brew install dylibbundler p7zip qt5 sdl2
+brew install dylibbundler p7zip qt5 sdl2 ccache
 brew outdated cmake || brew upgrade cmake
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -64,6 +64,10 @@ std::string ArrayToString(const u8* data, size_t size, int line_len, bool spaces
    return oss.str();
 }

+std::string StringFromBuffer(const std::vector<u8>& data) {
+    return std::string(data.begin(), std::find(data.begin(), data.end(), '\0'));
+}
+
 // Turns "  hej " into "hej". Also handles tabs.
 std::string StripSpaces(const std::string& str) {
    const size_t s = str.find_first_not_of(" \t\r\n");
--- a/src/common/string_util.h
+++ b/src/common/string_util.h
@@ -21,6 +21,8 @@ std::string ToUpper(std::string str);

 std::string ArrayToString(const u8* data, size_t size, int line_len = 20, bool spaces = true);

+std::string StringFromBuffer(const std::vector<u8>& data);
+
 std::string StripSpaces(const std::string& s);
 std::string StripQuotes(const std::string& s);

--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -4,6 +4,7 @@

 #include <cinttypes>
 #include "common/logging/log.h"
+#include "common/string_util.h"
 #include "core/core.h"
 #include "core/file_sys/directory.h"
 #include "core/file_sys/filesystem.h"
@@ -258,9 +259,7 @@ public:
        IPC::RequestParser rp{ctx};

        auto file_buffer = ctx.ReadBuffer();
-        auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
-        std::string name(file_buffer.begin(), end);
+        std::string name = Common::StringFromBuffer(file_buffer);

        u64 mode = rp.Pop<u64>();
        u32 size = rp.Pop<u32>();
@@ -275,9 +274,7 @@ public:
        IPC::RequestParser rp{ctx};

        auto file_buffer = ctx.ReadBuffer();
-        auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
-        std::string name(file_buffer.begin(), end);
+        std::string name = Common::StringFromBuffer(file_buffer);

        NGLOG_DEBUG(Service_FS, "called file {}", name);

@@ -289,9 +286,7 @@ public:
        IPC::RequestParser rp{ctx};

        auto file_buffer = ctx.ReadBuffer();
-        auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
-        std::string name(file_buffer.begin(), end);
+        std::string name = Common::StringFromBuffer(file_buffer);

        NGLOG_DEBUG(Service_FS, "called directory {}", name);

@@ -305,13 +300,11 @@ public:
        std::vector<u8> buffer;
        buffer.resize(ctx.BufferDescriptorX()[0].Size());
        Memory::ReadBlock(ctx.BufferDescriptorX()[0].Address(), buffer.data(), buffer.size());
-        auto end = std::find(buffer.begin(), buffer.end(), '\0');
-        std::string src_name(buffer.begin(), end);
+        std::string src_name = Common::StringFromBuffer(buffer);

        buffer.resize(ctx.BufferDescriptorX()[1].Size());
        Memory::ReadBlock(ctx.BufferDescriptorX()[1].Address(), buffer.data(), buffer.size());
-        end = std::find(buffer.begin(), buffer.end(), '\0');
-        std::string dst_name(buffer.begin(), end);
+        std::string dst_name = Common::StringFromBuffer(buffer);

        NGLOG_DEBUG(Service_FS, "called file '{}' to file '{}'", src_name, dst_name);

@@ -323,9 +316,7 @@ public:
        IPC::RequestParser rp{ctx};

        auto file_buffer = ctx.ReadBuffer();
-        auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
-        std::string name(file_buffer.begin(), end);
+        std::string name = Common::StringFromBuffer(file_buffer);

        auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>());

@@ -349,9 +340,7 @@ public:
        IPC::RequestParser rp{ctx};

        auto file_buffer = ctx.ReadBuffer();
-        auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
-        std::string name(file_buffer.begin(), end);
+        std::string name = Common::StringFromBuffer(file_buffer);

        // TODO(Subv): Implement this filter.
        u32 filter_flags = rp.Pop<u32>();
@@ -376,9 +365,7 @@ public:
        IPC::RequestParser rp{ctx};

        auto file_buffer = ctx.ReadBuffer();
-        auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
-        std::string name(file_buffer.begin(), end);
+        std::string name = Common::StringFromBuffer(file_buffer);

        NGLOG_DEBUG(Service_FS, "called file {}", name);

--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -318,6 +318,7 @@ public:
            Equation equation_a;
            Factor factor_source_a;
            Factor factor_dest_a;
+            INSERT_PADDING_WORDS(1);
        };

        union {
@@ -432,7 +433,27 @@ public:
                    };
                } rt_control;

-                INSERT_PADDING_WORDS(0xCF);
+                INSERT_PADDING_WORDS(0x31);
+
+                u32 independent_blend_enable;
+
+                INSERT_PADDING_WORDS(0x15);
+
+                struct {
+                    u32 separate_alpha;
+                    Blend::Equation equation_rgb;
+                    Blend::Factor factor_source_rgb;
+                    Blend::Factor factor_dest_rgb;
+                    Blend::Equation equation_a;
+                    Blend::Factor factor_source_a;
+                    INSERT_PADDING_WORDS(1);
+                    Blend::Factor factor_dest_a;
+
+                    u32 enable_common;
+                    u32 enable[NumRenderTargets];
+                } blend;
+
+                INSERT_PADDING_WORDS(0x77);

                struct {
                    u32 tsc_address_high;
@@ -557,9 +578,7 @@ public:

                } vertex_array[NumVertexArrays];

-                Blend blend;
-
-                INSERT_PADDING_WORDS(0x39);
+                Blend independent_blend[NumRenderTargets];

                struct {
                    u32 limit_high;
@@ -722,6 +741,8 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
 ASSERT_REG_POSITION(rt_control, 0x487);
+ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
+ASSERT_REG_POSITION(blend, 0x4CF);
 ASSERT_REG_POSITION(tsc, 0x557);
 ASSERT_REG_POSITION(tic, 0x55D);
 ASSERT_REG_POSITION(code_address, 0x582);
@@ -729,7 +750,7 @@ ASSERT_REG_POSITION(draw, 0x585);
 ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(query, 0x6C0);
 ASSERT_REG_POSITION(vertex_array[0], 0x700);
-ASSERT_REG_POSITION(blend, 0x780);
+ASSERT_REG_POSITION(independent_blend, 0x780);
 ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
 ASSERT_REG_POSITION(shader_config[0], 0x800);
 ASSERT_REG_POSITION(const_buffer, 0x8E0);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -213,10 +213,11 @@ union Instruction {
    BitField<28, 8, Register> gpr28;
    BitField<39, 8, Register> gpr39;
    BitField<48, 16, u64> opcode;
+    BitField<50, 1, u64> saturate_a;

    union {
        BitField<20, 19, u64> imm20_19;
-        BitField<20, 32, u64> imm20_32;
+        BitField<20, 32, s64> imm20_32;
        BitField<45, 1, u64> negate_b;
        BitField<46, 1, u64> abs_a;
        BitField<48, 1, u64> negate_a;
@@ -246,7 +247,7 @@ union Instruction {

        float GetImm20_32() const {
            float result{};
-            u32 imm{static_cast<u32>(imm20_32)};
+            s32 imm{static_cast<s32>(imm20_32)};
            std::memcpy(&result, &imm, sizeof(imm));
            return result;
        }
@@ -259,11 +260,20 @@ union Instruction {
        }
    } alu;

+    union {
+        BitField<48, 1, u64> is_signed;
+    } shift;
+
    union {
        BitField<39, 5, u64> shift_amount;
        BitField<48, 1, u64> negate_b;
        BitField<49, 1, u64> negate_a;
-    } iscadd;
+    } alu_integer;
+
+    union {
+        BitField<54, 1, u64> saturate;
+        BitField<56, 1, u64> negate_a;
+    } iadd32i;

    union {
        BitField<20, 8, u64> shift_position;
@@ -324,6 +334,15 @@ union Instruction {
        BitField<56, 1, u64> neg_imm;
    } fset;

+    union {
+        BitField<39, 3, u64> pred39;
+        BitField<42, 1, u64> neg_pred;
+        BitField<44, 1, u64> bf;
+        BitField<45, 2, PredOperation> op;
+        BitField<48, 1, u64> is_signed;
+        BitField<49, 3, PredCondition> cond;
+    } iset;
+
    union {
        BitField<10, 2, Register::Size> size;
        BitField<12, 1, u64> is_output_signed;
@@ -331,7 +350,6 @@ union Instruction {
        BitField<41, 2, u64> selector;
        BitField<45, 1, u64> negate_a;
        BitField<49, 1, u64> abs_a;
-        BitField<50, 1, u64> saturate_a;

        union {
            BitField<39, 2, F2iRoundingOp> rounding;
@@ -410,6 +428,7 @@ class OpCode {
 public:
    enum class Id {
        KIL,
+        SSY,
        BFE_C,
        BFE_R,
        BFE_IMM,
@@ -434,6 +453,10 @@ public:
        FMUL_R,
        FMUL_IMM,
        FMUL32_IMM,
+        IADD_C,
+        IADD_R,
+        IADD_IMM,
+        IADD32I,
        ISCADD_C, // Scale and Add
        ISCADD_R,
        ISCADD_IMM,
@@ -479,6 +502,9 @@ public:
        ISETP_C,
        ISETP_IMM,
        ISETP_R,
+        ISET_R,
+        ISET_C,
+        ISET_IMM,
        PSETP,
        XMAD_IMM,
        XMAD_CR,
@@ -489,15 +515,17 @@ public:
    enum class Type {
        Trivial,
        Arithmetic,
+        ArithmeticInteger,
+        ArithmeticIntegerImmediate,
        Bfe,
        Logic,
        Shift,
-        ScaledAdd,
        Ffma,
        Flow,
        Memory,
        FloatSet,
        FloatSetPredicate,
+        IntegerSet,
        IntegerSetPredicate,
        PredicateSetPredicate,
        Conversion,
@@ -596,6 +624,7 @@ private:
        std::vector<Matcher> table = {
 #define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
            INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
+            INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
            INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
            INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
            INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
@@ -617,9 +646,13 @@ private:
            INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
            INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
            INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
-            INST("0100110000011---", Id::ISCADD_C, Type::ScaledAdd, "ISCADD_C"),
-            INST("0101110000011---", Id::ISCADD_R, Type::ScaledAdd, "ISCADD_R"),
-            INST("0011100-00011---", Id::ISCADD_IMM, Type::ScaledAdd, "ISCADD_IMM"),
+            INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
+            INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
+            INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
+            INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
+            INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
+            INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
+            INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -665,6 +698,9 @@ private:
            INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
            INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
            INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
+            INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
+            INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
+            INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
            INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
            INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"),
            INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"),
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -218,6 +218,9 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
        ubo.SetFromRegs(gpu.state.shader_stages[stage]);
        std::memcpy(buffer_ptr, &ubo, sizeof(ubo));

+        // Flush the buffer so that the GPU can see the data we just wrote.
+        glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo));
+
        // Upload uniform data as one UBO per stage
        const GLintptr ubo_offset = buffer_offset;
        copy_buffer(uniform_buffers[stage].handle, ubo_offset,
@@ -346,6 +349,9 @@ void RasterizerOpenGL::DrawArrays() {
    // Sync the viewport
    SyncViewport(surfaces_rect, res_scale);

+    // Sync the blend state registers
+    SyncBlendState();
+
    // TODO(bunnei): Sync framebuffer_scale uniform here
    // TODO(bunnei): Sync scissorbox uniform(s) here

@@ -452,32 +458,7 @@ void RasterizerOpenGL::DrawArrays() {
    }
 }

-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {
-    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
-    switch (method) {
-    case MAXWELL3D_REG_INDEX(blend.separate_alpha):
-        ASSERT_MSG(false, "unimplemented");
-        break;
-    case MAXWELL3D_REG_INDEX(blend.equation_rgb):
-        state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
-        break;
-    case MAXWELL3D_REG_INDEX(blend.factor_source_rgb):
-        state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
-        break;
-    case MAXWELL3D_REG_INDEX(blend.factor_dest_rgb):
-        state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
-        break;
-    case MAXWELL3D_REG_INDEX(blend.equation_a):
-        state.blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
-        break;
-    case MAXWELL3D_REG_INDEX(blend.factor_source_a):
-        state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
-        break;
-    case MAXWELL3D_REG_INDEX(blend.factor_dest_a):
-        state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
-        break;
-    }
-}
+void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}

 void RasterizerOpenGL::FlushAll() {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@@ -757,14 +738,21 @@ void RasterizerOpenGL::SyncDepthOffset() {
    UNREACHABLE();
 }

-void RasterizerOpenGL::SyncBlendEnabled() {
-    UNREACHABLE();
-}
+void RasterizerOpenGL::SyncBlendState() {
+    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+    ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");

-void RasterizerOpenGL::SyncBlendFuncs() {
-    UNREACHABLE();
-}
+    // TODO(Subv): Support more than just render target 0.
+    state.blend.enabled = regs.blend.enable[0] != 0;

-void RasterizerOpenGL::SyncBlendColor() {
-    UNREACHABLE();
+    if (!state.blend.enabled)
+        return;
+
+    ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented");
+    state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb);
+    state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb);
+    state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_rgb);
+    state.blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_a);
+    state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_a);
+    state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_a);
 }
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -121,14 +121,8 @@ private:
    /// Syncs the depth offset to match the guest state
    void SyncDepthOffset();

-    /// Syncs the blend enabled status to match the guest state
-    void SyncBlendEnabled();
-
-    /// Syncs the blend functions to match the guest state
-    void SyncBlendFuncs();
-
-    /// Syncs the blend color to match the guest state
-    void SyncBlendColor();
+    /// Syncs the blend state to match the guest state
+    void SyncBlendState();

    bool has_ARB_buffer_storage;
    bool has_ARB_direct_state_access;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -1033,8 +1033,11 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
    params.addr = config.tic.Address();
    params.is_tiled = config.tic.IsTiled();
    params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
-    params.width = config.tic.Width() / params.GetCompresssionFactor();
-    params.height = config.tic.Height() / params.GetCompresssionFactor();
+
+    params.width = Common::AlignUp(config.tic.Width(), params.GetCompresssionFactor()) /
+                   params.GetCompresssionFactor();
+    params.height = Common::AlignUp(config.tic.Height(), params.GetCompresssionFactor()) /
+                    params.GetCompresssionFactor();

    // TODO(Subv): Different types per component are not supported.
    ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() &&
@@ -1045,6 +1048,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu

    if (config.tic.IsTiled()) {
        params.block_height = config.tic.BlockHeight();
+        params.width = Common::AlignUp(params.width, params.block_height);
+        params.height = Common::AlignUp(params.height, params.block_height);
    } else {
        // Use the texture-provided stride value if the texture isn't tiled.
        params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch()));
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -538,7 +538,7 @@ private:
            // vertex shader, and what's the value of the fourth element when inside a Tess Eval
            // shader.
            ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
-            return "vec4(0, 0, gl_InstanceID, gl_VertexID)";
+            return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))";
        default:
            const u32 index{static_cast<u32>(attribute) -
                            static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -808,6 +808,8 @@ private:
            case OpCode::Id::FMUL_C:
            case OpCode::Id::FMUL_R:
            case OpCode::Id::FMUL_IMM: {
+                ASSERT_MSG(!instr.saturate_a, "Unimplemented");
+
                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
                break;
            }
@@ -821,10 +823,14 @@ private:
            case OpCode::Id::FADD_C:
            case OpCode::Id::FADD_R:
            case OpCode::Id::FADD_IMM: {
+                ASSERT_MSG(!instr.saturate_a, "Unimplemented");
+
                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, instr.alu.abs_d);
                break;
            }
            case OpCode::Id::MUFU: {
+                ASSERT_MSG(!instr.saturate_a, "Unimplemented");
+
                switch (instr.sub_op) {
                case SubOp::Cos:
                    regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
@@ -973,6 +979,19 @@ private:
            }

            switch (opcode->GetId()) {
+            case OpCode::Id::SHR_C:
+            case OpCode::Id::SHR_R:
+            case OpCode::Id::SHR_IMM: {
+                if (!instr.shift.is_signed) {
+                    // Logical shift right
+                    op_a = "uint(" + op_a + ')';
+                }
+
+                // Cast to int is superfluous for arithmetic shift, it's only for a logical shift
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')',
+                                          1, 1);
+                break;
+            }
            case OpCode::Id::SHL_C:
            case OpCode::Id::SHL_R:
            case OpCode::Id::SHL_IMM:
@@ -986,13 +1005,34 @@ private:
            break;
        }

-        case OpCode::Type::ScaledAdd: {
+        case OpCode::Type::ArithmeticIntegerImmediate: {
            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);

-            if (instr.iscadd.negate_a)
+            if (instr.iadd32i.negate_a)
                op_a = '-' + op_a;

-            std::string op_b = instr.iscadd.negate_b ? "-" : "";
+            std::string op_b = '(' + std::to_string(instr.alu.imm20_32.Value()) + ')';
+
+            switch (opcode->GetId()) {
+            case OpCode::Id::IADD32I:
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
+                                          instr.iadd32i.saturate != 0);
+                break;
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticIntegerImmediate instruction: {}",
+                               opcode->GetName());
+                UNREACHABLE();
+            }
+            }
+            break;
+        }
+        case OpCode::Type::ArithmeticInteger: {
+            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
+
+            if (instr.alu_integer.negate_a)
+                op_a = '-' + op_a;
+
+            std::string op_b = instr.alu_integer.negate_b ? "-" : "";

            if (instr.is_b_imm) {
                op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
@@ -1005,13 +1045,35 @@ private:
                }
            }

-            std::string shift = std::to_string(instr.iscadd.shift_amount.Value());
+            switch (opcode->GetId()) {
+            case OpCode::Id::IADD_C:
+            case OpCode::Id::IADD_R:
+            case OpCode::Id::IADD_IMM: {
+                ASSERT_MSG(!instr.saturate_a, "Unimplemented");
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1);
+                break;
+            }
+            case OpCode::Id::ISCADD_C:
+            case OpCode::Id::ISCADD_R:
+            case OpCode::Id::ISCADD_IMM: {
+                std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
+
+                regs.SetRegisterToInteger(instr.gpr0, true, 0,
+                                          "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
+                break;
+            }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
+                               opcode->GetName());
+                UNREACHABLE();
+            }
+            }

-            regs.SetRegisterToInteger(instr.gpr0, true, 0,
-                                      "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
            break;
        }
        case OpCode::Type::Ffma: {
+            ASSERT_MSG(!instr.saturate_a, "Unimplemented");
+
            std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
            std::string op_b = instr.ffma.negate_b ? "-" : "";
            std::string op_c = instr.ffma.negate_c ? "-" : "";
@@ -1051,7 +1113,7 @@ private:
        case OpCode::Type::Conversion: {
            ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented");
            ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
-            ASSERT_MSG(!instr.conversion.saturate_a, "Unimplemented");
+            ASSERT_MSG(!instr.saturate_a, "Unimplemented");

            switch (opcode->GetId()) {
            case OpCode::Id::I2I_R: {
@@ -1081,6 +1143,8 @@ private:
                break;
            }
            case OpCode::Id::F2F_R: {
+                ASSERT_MSG(!instr.saturate_a, "Unimplemented");
+
                std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);

                switch (instr.conversion.f2f.rounding) {
@@ -1198,8 +1262,8 @@ private:
                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
                const std::string sampler = GetSampler(instr.sampler);
                const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
-                // Add an extra scope and declare the texture coords inside to prevent overwriting
-                // them in case they are used as outputs of the texs instruction.
+                // Add an extra scope and declare the texture coords inside to prevent
+                // overwriting them in case they are used as outputs of the texs instruction.
                shader.AddLine("{");
                ++shader.scope;
                shader.AddLine(coord);
@@ -1230,8 +1294,8 @@ private:
                shader.AddLine(coord);
                const std::string texture = "texture(" + sampler + ", coords)";

-                // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA goes
-                // into gpr28+0 and gpr28+1
+                // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA
+                // goes into gpr28+0 and gpr28+1
                size_t offset{};

                for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) {
@@ -1380,8 +1444,8 @@ private:
                op_b = "abs(" + op_b + ')';
            }

-            // The fset instruction sets a register to 1.0 if the condition is true, and to 0
-            // otherwise.
+            // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
+            // condition is true, and to 0 otherwise.
            std::string second_pred =
                GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);

@@ -1399,6 +1463,41 @@ private:
            }
            break;
        }
+        case OpCode::Type::IntegerSet: {
+            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
+
+            std::string op_b;
+
+            if (instr.is_b_imm) {
+                op_b = std::to_string(instr.alu.GetSignedImm20_20());
+            } else {
+                if (instr.is_b_gpr) {
+                    op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, instr.iset.is_signed);
+                } else {
+                    op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                           GLSLRegister::Type::Integer);
+                }
+            }
+
+            // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
+            // condition is true, and to 0 otherwise.
+            std::string second_pred =
+                GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
+
+            std::string comparator = GetPredicateComparison(instr.iset.cond);
+            std::string combiner = GetPredicateCombiner(instr.iset.op);
+
+            std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
+                                    combiner + " (" + second_pred + "))";
+
+            if (instr.iset.bf) {
+                regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
+            } else {
+                regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
+                                          1);
+            }
+            break;
+        }
        default: {
            switch (opcode->GetId()) {
            case OpCode::Id::EXIT: {
@@ -1412,8 +1511,8 @@ private:

                shader.AddLine("return true;");
                if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
-                    // If this is an unconditional exit then just end processing here, otherwise we
-                    // have to account for the possibility of the condition not being met, so
+                    // If this is an unconditional exit then just end processing here, otherwise
+                    // we have to account for the possibility of the condition not being met, so
                    // continue processing the next instruction.
                    offset = PROGRAM_END - 1;
                }
@@ -1435,6 +1534,11 @@ private:
                regs.SetRegisterToInputAttibute(instr.gpr0, attribute.element, attribute.index);
                break;
            }
+            case OpCode::Id::SSY: {
+                // The SSY opcode tells the GPU where to re-converge divergent execution paths, we
+                // can ignore this when generating GLSL code.
+                break;
+            }
            default: {
                NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
                UNREACHABLE();
Author	SHA1	Message	Date
Subv	db0497b808	GPU: Implemented the iadd32i shader instruction.	2018-06-12 11:46:45 -05:00
bunnei	09b8a16414	Merge pull request #555 from Subv/gpu_sysregs GPU: Convert the gl_InstanceId and gl_VertexID variables to floats when reading from them.	2018-06-10 20:55:27 -04:00
Subv	004b1b3830	GPU: Convert the gl_InstanceId and gl_VertexID variables to floats when reading from them. This corrects the invalid position values in some games when doing attribute-less rendering.	2018-06-10 13:50:19 -05:00
bunnei	281fd881a0	Merge pull request #553 from Subv/iset GPU: Implement the ISET family of shader instructions.	2018-06-10 10:50:38 -04:00
Subv	b366b885a1	GPU: Implement the iset family of shader instructions.	2018-06-09 16:19:13 -05:00
Subv	3cb753eeb1	GPU: Added decodings for the ISET family of instructions.	2018-06-09 15:56:50 -05:00
bunnei	d81aaa3ed3	Merge pull request #550 from Subv/ssy GPU: Stub the SSY shader instruction.	2018-06-09 00:42:53 -04:00
bunnei	e2176dc7ce	Merge pull request #551 from bunnei/shr gl_shader_decompiler: Implement SHR instruction.	2018-06-09 00:42:44 -04:00
bunnei	174c22e5f6	Merge pull request #549 from bunnei/iadd gl_shader_decompiler: Implement IADD instruction.	2018-06-09 00:34:03 -04:00
bunnei	5440b9c634	gl_shader_decompiler: Implement SHR instruction.	2018-06-09 00:01:17 -04:00
Subv	abec5f82e2	GPU: Stub the SSY shader instruction. This instruction tells the GPU where the flow reconverges in a non-uniform control flow scenario, we can ignore this when generating GLSL code.	2018-06-08 22:46:10 -05:00
bunnei	bbc4f369ed	gl_shader_decompiler: Implement IADD instruction.	2018-06-08 23:25:22 -04:00
bunnei	79e9c2e237	gl_shader_decompiler: Add missing asserts for saturate_a instructions.	2018-06-08 23:24:10 -04:00
bunnei	83517cb53a	Merge pull request #505 from janisozaur/ccache-travis Enable ccache usage on Travis	2018-06-08 18:51:59 -04:00
bunnei	9949e4d508	Merge pull request #533 from mailwl/array-to-buffer Common/string_util: add StringFromBuffer() function	2018-06-08 18:51:00 -04:00
bunnei	c116b220e9	Merge pull request #548 from Subv/blend GPU: Fixed ghosting when drawing with blending disabled	2018-06-08 18:48:12 -04:00
Subv	c011b6f67e	GPU: Synchronize the blend state on every draw call. Only independent blending on render target 0 is implemented for now. This fixes the elongated squids in Splatoon 2's boot screen.	2018-06-08 17:05:52 -05:00
Subv	c712dafaee	GPU: Added registers for normal and independent blending.	2018-06-08 17:04:41 -05:00
bunnei	a931cf9e8b	Merge pull request #547 from Subv/compressed_alignment GLCache: Align compressed texture sizes to their compression ratio, and then align that compressed size to the block height for tiled textures.	2018-06-08 16:40:49 -04:00
bunnei	a941a94148	Merge pull request #546 from Subv/flush_ubo_buffer Rasterizer: Flush the written region when writing shader uniform data before copying it to the uniform buffers.	2018-06-08 16:39:55 -04:00
Subv	8d9534d830	GLCache: Align compressed texture sizes to their compression ratio, and then align that compressed size to the block height for tiled textures. This fixes issues with retrieving non-block-aligned tiled compressed textures from the cache.	2018-06-08 12:27:19 -05:00
Subv	47dc5e0dab	Rasterizer: Flush the written region when writing shader uniform data before copying it to the uniform buffers. This fixes the flip_viewport uniform having invalid values when drawing.	2018-06-08 12:22:39 -05:00
Michał Janiszewski	f3885845fc	Cache ccache on Travis	2018-06-07 21:43:33 +02:00
Michał Janiszewski	c0d3e2da4e	Add ccache support for macOS on Travis	2018-06-07 21:43:33 +02:00
Michał Janiszewski	517112f549	Add ccache support for Linux on Travis	2018-06-07 21:43:32 +02:00
Michał Janiszewski	6324d86c71	Install cmake from repositories for Ubuntu Ubuntu 18.04 already has cmake 3.10.2	2018-06-07 21:42:12 +02:00
mailwl	a2efb1dd48	Common/string_util: add StringFromBuffer function convert input buffer (std::vector<u8>) to string, stripping zero chars	2018-06-07 09:59:47 +03:00