gl_shader_decompiler: HACK: Remove some unreachables so games run.

gl_rasterizer_cache: HACK: Pre-swap raw textures before uploading them.
- Temporary fix for Cave Story, will be removed before merging.
2018-04-17 22:42:07 -04:00 · 2018-04-17 22:25:30 -04:00 · 2018-04-17 22:25:29 -04:00 · 2018-04-17 22:23:14 -04:00 · 2018-04-17 22:21:44 -04:00 · 2018-04-17 22:17:07 -04:00
22 changed files with 729 additions and 156 deletions
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -192,11 +192,6 @@ private:
    static_assert(position < 8 * sizeof(T), "Invalid position");
    static_assert(bits <= 8 * sizeof(T), "Invalid number of bits");
    static_assert(bits > 0, "Invalid number of bits");
-    static_assert(std::is_pod<T>::value, "Invalid base type");
+    static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField");
 };
 #pragma pack()
-
-#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
-static_assert(std::is_trivially_copyable<BitField<0, 1, unsigned>>::value,
-              "BitField must be trivially copyable");
-#endif
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -12,6 +12,8 @@ add_library(core STATIC
    file_sys/errors.h
    file_sys/filesystem.cpp
    file_sys/filesystem.h
+    file_sys/partition_filesystem.cpp
+    file_sys/partition_filesystem.h
    file_sys/path_parser.cpp
    file_sys/path_parser.h
    file_sys/program_metadata.cpp
--- a/src/core/file_sys/partition_filesystem.cpp
+++ b/src/core/file_sys/partition_filesystem.cpp
@@ -0,0 +1,125 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cinttypes>
+#include <utility>
+#include "common/file_util.h"
+#include "common/logging/log.h"
+#include "core/file_sys/partition_filesystem.h"
+#include "core/loader/loader.h"
+
+namespace FileSys {
+
+Loader::ResultStatus PartitionFilesystem::Load(const std::string& file_path, size_t offset) {
+    FileUtil::IOFile file(file_path, "rb");
+    if (!file.IsOpen())
+        return Loader::ResultStatus::Error;
+
+    // At least be as large as the header
+    if (file.GetSize() < sizeof(Header))
+        return Loader::ResultStatus::Error;
+
+    // For cartridges, HFSs can get very large, so we need to calculate the size up to
+    // the actual content itself instead of just blindly reading in the entire file.
+    Header pfs_header;
+    if (!file.ReadBytes(&pfs_header, sizeof(Header)))
+        return Loader::ResultStatus::Error;
+
+    bool is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0);
+    size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
+    size_t metadata_size =
+        sizeof(Header) + (pfs_header.num_entries * entry_size) + pfs_header.strtab_size;
+
+    // Actually read in now...
+    file.Seek(offset, SEEK_SET);
+    std::vector<u8> file_data(metadata_size);
+
+    if (!file.ReadBytes(file_data.data(), metadata_size))
+        return Loader::ResultStatus::Error;
+
+    Loader::ResultStatus result = Load(file_data);
+    if (result != Loader::ResultStatus::Success)
+        LOG_ERROR(Service_FS, "Failed to load PFS from file %s!", file_path.c_str());
+
+    return result;
+}
+
+Loader::ResultStatus PartitionFilesystem::Load(const std::vector<u8>& file_data, size_t offset) {
+    size_t total_size = file_data.size() - offset;
+    if (total_size < sizeof(Header))
+        return Loader::ResultStatus::Error;
+
+    memcpy(&pfs_header, &file_data[offset], sizeof(Header));
+    is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0);
+
+    size_t entries_offset = offset + sizeof(Header);
+    size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
+    size_t strtab_offset = entries_offset + (pfs_header.num_entries * entry_size);
+    for (u16 i = 0; i < pfs_header.num_entries; i++) {
+        FileEntry entry;
+
+        memcpy(&entry.fs_entry, &file_data[entries_offset + (i * entry_size)], sizeof(FSEntry));
+        entry.name = std::string(reinterpret_cast<const char*>(
+            &file_data[strtab_offset + entry.fs_entry.strtab_offset]));
+        pfs_entries.push_back(std::move(entry));
+    }
+
+    content_offset = strtab_offset + pfs_header.strtab_size;
+
+    return Loader::ResultStatus::Success;
+}
+
+u32 PartitionFilesystem::GetNumEntries() const {
+    return pfs_header.num_entries;
+}
+
+u64 PartitionFilesystem::GetEntryOffset(int index) const {
+    if (index > GetNumEntries())
+        return 0;
+
+    return content_offset + pfs_entries[index].fs_entry.offset;
+}
+
+u64 PartitionFilesystem::GetEntrySize(int index) const {
+    if (index > GetNumEntries())
+        return 0;
+
+    return pfs_entries[index].fs_entry.size;
+}
+
+std::string PartitionFilesystem::GetEntryName(int index) const {
+    if (index > GetNumEntries())
+        return "";
+
+    return pfs_entries[index].name;
+}
+
+u64 PartitionFilesystem::GetFileOffset(const std::string& name) const {
+    for (u32 i = 0; i < pfs_header.num_entries; i++) {
+        if (pfs_entries[i].name == name)
+            return content_offset + pfs_entries[i].fs_entry.offset;
+    }
+
+    return 0;
+}
+
+u64 PartitionFilesystem::GetFileSize(const std::string& name) const {
+    for (u32 i = 0; i < pfs_header.num_entries; i++) {
+        if (pfs_entries[i].name == name)
+            return pfs_entries[i].fs_entry.size;
+    }
+
+    return 0;
+}
+
+void PartitionFilesystem::Print() const {
+    NGLOG_DEBUG(Service_FS, "Magic:                  {:.4}", pfs_header.magic.data());
+    NGLOG_DEBUG(Service_FS, "Files:                  {}", pfs_header.num_entries);
+    for (u32 i = 0; i < pfs_header.num_entries; i++) {
+        NGLOG_DEBUG(Service_FS, " > File {}:              {} (0x{:X} bytes, at 0x{:X})", i,
+                    pfs_entries[i].name.c_str(), pfs_entries[i].fs_entry.size,
+                    GetFileOffset(pfs_entries[i].name));
+    }
+}
+} // namespace FileSys
--- a/src/core/file_sys/partition_filesystem.h
+++ b/src/core/file_sys/partition_filesystem.h
@@ -0,0 +1,87 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <string>
+#include <vector>
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "common/swap.h"
+
+namespace Loader {
+enum class ResultStatus;
+}
+
+namespace FileSys {
+
+/**
+ * Helper which implements an interface to parse PFS/HFS filesystems.
+ * Data can either be loaded from a file path or data with an offset into it.
+ */
+class PartitionFilesystem {
+public:
+    Loader::ResultStatus Load(const std::string& file_path, size_t offset = 0);
+    Loader::ResultStatus Load(const std::vector<u8>& file_data, size_t offset = 0);
+
+    u32 GetNumEntries() const;
+    u64 GetEntryOffset(int index) const;
+    u64 GetEntrySize(int index) const;
+    std::string GetEntryName(int index) const;
+    u64 GetFileOffset(const std::string& name) const;
+    u64 GetFileSize(const std::string& name) const;
+
+    void Print() const;
+
+private:
+    struct Header {
+        std::array<char, 4> magic;
+        u32_le num_entries;
+        u32_le strtab_size;
+        INSERT_PADDING_BYTES(0x4);
+    };
+
+    static_assert(sizeof(Header) == 0x10, "PFS/HFS header structure size is wrong");
+
+#pragma pack(push, 1)
+    struct FSEntry {
+        u64_le offset;
+        u64_le size;
+        u32_le strtab_offset;
+    };
+
+    static_assert(sizeof(FSEntry) == 0x14, "FS entry structure size is wrong");
+
+    struct PFSEntry {
+        FSEntry fs_entry;
+        INSERT_PADDING_BYTES(0x4);
+    };
+
+    static_assert(sizeof(PFSEntry) == 0x18, "PFS entry structure size is wrong");
+
+    struct HFSEntry {
+        FSEntry fs_entry;
+        u32_le hash_region_size;
+        INSERT_PADDING_BYTES(0x8);
+        std::array<char, 0x20> hash;
+    };
+
+    static_assert(sizeof(HFSEntry) == 0x40, "HFS entry structure size is wrong");
+
+#pragma pack(pop)
+
+    struct FileEntry {
+        FSEntry fs_entry;
+        std::string name;
+    };
+
+    Header pfs_header;
+    bool is_hfs;
+    size_t content_offset;
+
+    std::vector<FileEntry> pfs_entries;
+};
+
+} // namespace FileSys
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -74,8 +74,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {

    regs.reg_array[method] = value;

-#define MAXWELL3D_REG_INDEX(field_name) (offsetof(Regs, field_name) / sizeof(u32))
-
    switch (method) {
    case MAXWELL3D_REG_INDEX(code_address.code_address_high):
    case MAXWELL3D_REG_INDEX(code_address.code_address_low): {
@@ -136,7 +134,7 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
        break;
    }

-#undef MAXWELL3D_REG_INDEX
+    VideoCore::g_renderer->Rasterizer()->NotifyMaxwellRegisterChanged(method);

    if (debug_context) {
        debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
@@ -220,10 +218,8 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
    Texture::TICEntry tic_entry;
    Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));

-    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear,
-               "TIC versions other than BlockLinear are unimplemented");
-
-    ASSERT_MSG(tic_entry.texture_type == Texture::TextureType::Texture2D,
+    ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
+                   (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),
               "Texture types other than Texture2D are unimplemented");

    auto r_type = tic_entry.r_type.Value();
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -20,6 +20,9 @@
 namespace Tegra {
 namespace Engines {

+#define MAXWELL3D_REG_INDEX(field_name)                                                            \
+    (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
+
 class Maxwell3D final {
 public:
    explicit Maxwell3D(MemoryManager& memory_manager);
@@ -254,6 +257,46 @@ public:
            UnsignedInt = 0x2,
        };

+        struct Blend {
+            enum class Equation : u32 {
+                Add = 1,
+                Subtract = 2,
+                ReverseSubtract = 3,
+                Min = 4,
+                Max = 5,
+            };
+
+            enum class Factor : u32 {
+                Zero = 0x1,
+                One = 0x2,
+                SourceColor = 0x3,
+                OneMinusSourceColor = 0x4,
+                SourceAlpha = 0x5,
+                OneMinusSourceAlpha = 0x6,
+                DestAlpha = 0x7,
+                OneMinusDestAlpha = 0x8,
+                DestColor = 0x9,
+                OneMinusDestColor = 0xa,
+                SourceAlphaSaturate = 0xb,
+                Source1Color = 0x10,
+                OneMinusSource1Color = 0x11,
+                Source1Alpha = 0x12,
+                OneMinusSource1Alpha = 0x13,
+                ConstantColor = 0x61,
+                OneMinusConstantColor = 0x62,
+                ConstantAlpha = 0x63,
+                OneMinusConstantAlpha = 0x64,
+            };
+
+            u32 separate_alpha;
+            Equation equation_rgb;
+            Factor factor_source_rgb;
+            Factor factor_dest_rgb;
+            Equation equation_a;
+            Factor factor_source_a;
+            Factor factor_dest_a;
+        };
+
        union {
            struct {
                INSERT_PADDING_WORDS(0x200);
@@ -276,7 +319,14 @@ public:
                    }
                } rt[NumRenderTargets];

-                INSERT_PADDING_WORDS(0x80);
+                f32 viewport_scale_x;
+                f32 viewport_scale_y;
+                f32 viewport_scale_z;
+                u32 viewport_translate_x;
+                u32 viewport_translate_y;
+                u32 viewport_translate_z;
+
+                INSERT_PADDING_WORDS(0x7A);

                struct {
                    union {
@@ -451,7 +501,9 @@ public:
                    }
                } vertex_array[NumVertexArrays];

-                INSERT_PADDING_WORDS(0x40);
+                Blend blend;
+
+                INSERT_PADDING_WORDS(0x39);

                struct {
                    u32 limit_high;
@@ -604,6 +656,12 @@ private:
                  "Field " #field_name " has invalid position")

 ASSERT_REG_POSITION(rt, 0x200);
+ASSERT_REG_POSITION(viewport_scale_x, 0x280);
+ASSERT_REG_POSITION(viewport_scale_y, 0x281);
+ASSERT_REG_POSITION(viewport_scale_z, 0x282);
+ASSERT_REG_POSITION(viewport_translate_x, 0x283);
+ASSERT_REG_POSITION(viewport_translate_y, 0x284);
+ASSERT_REG_POSITION(viewport_translate_z, 0x285);
 ASSERT_REG_POSITION(viewport, 0x300);
 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
 ASSERT_REG_POSITION(zeta, 0x3F8);
@@ -616,6 +674,7 @@ ASSERT_REG_POSITION(draw, 0x585);
 ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(query, 0x6C0);
 ASSERT_REG_POSITION(vertex_array[0], 0x700);
+ASSERT_REG_POSITION(blend, 0x780);
 ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
 ASSERT_REG_POSITION(shader_config[0], 0x800);
 ASSERT_REG_POSITION(const_buffer, 0x8E0);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <cstring>
 #include <map>
 #include <string>
 #include "common/bit_field.h"
@@ -12,14 +13,10 @@ namespace Tegra {
 namespace Shader {

 struct Register {
-    Register() = default;
+    constexpr Register() = default;

    constexpr Register(u64 value) : value(value) {}

-    constexpr u64 GetIndex() const {
-        return value;
-    }
-
    constexpr operator u64() const {
        return value;
    }
@@ -43,13 +40,13 @@ struct Register {
    }

 private:
-    u64 value;
+    u64 value{};
 };

 union Attribute {
    Attribute() = default;

-    constexpr Attribute(u64 value) : value(value) {}
+    constexpr explicit Attribute(u64 value) : value(value) {}

    enum class Index : u64 {
        Position = 7,
@@ -68,7 +65,20 @@ union Attribute {
    } fmt28;

    BitField<39, 8, u64> reg;
-    u64 value;
+    u64 value{};
+};
+
+union Sampler {
+    Sampler() = default;
+
+    constexpr explicit Sampler(u64 value) : value(value) {}
+
+    enum class Index : u64 {
+        Sampler_0 = 8,
+    };
+
+    BitField<36, 13, Index> index;
+    u64 value{};
 };

 union Uniform {
@@ -238,7 +248,7 @@ union OpCode {
    BitField<55, 9, Id> op3;
    BitField<52, 12, Id> op4;
    BitField<51, 13, Id> op5;
-    u64 value;
+    u64 value{};
 };
 static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size");

@@ -280,6 +290,7 @@ enum class SubOp : u64 {
    Lg2 = 0x3,
    Rcp = 0x4,
    Rsq = 0x5,
+    Min = 0x8,
 };

 union Instruction {
@@ -295,15 +306,25 @@ union Instruction {
    BitField<20, 8, Register> gpr20;
    BitField<20, 7, SubOp> sub_op;
    BitField<28, 8, Register> gpr28;
-    BitField<36, 13, u64> imm36;
    BitField<39, 8, Register> gpr39;

    union {
+        BitField<20, 19, u64> imm20;
        BitField<45, 1, u64> negate_b;
        BitField<46, 1, u64> abs_a;
        BitField<48, 1, u64> negate_a;
        BitField<49, 1, u64> abs_b;
        BitField<50, 1, u64> abs_d;
+        BitField<56, 1, u64> negate_imm;
+
+        float GetImm20() const {
+            float result{};
+            u32 imm{static_cast<u32>(imm20)};
+            imm <<= 12;
+            imm |= negate_imm ? 0x80000000 : 0;
+            std::memcpy(&result, &imm, sizeof(imm));
+            return result;
+        }
    } alu;

    union {
@@ -311,11 +332,13 @@ union Instruction {
        BitField<49, 1, u64> negate_c;
    } ffma;

+    BitField<61, 1, u64> is_b_imm;
    BitField<60, 1, u64> is_b_gpr;
    BitField<59, 1, u64> is_c_gpr;

    Attribute attribute;
    Uniform uniform;
+    Sampler sampler;

    u64 hex;
 };
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -15,7 +15,10 @@ namespace Tegra {

 enum class RenderTargetFormat : u32 {
    NONE = 0x0,
+    RGBA16_FLOAT = 0xCA,
+    RGB10_A2_UNORM = 0xD1,
    RGBA8_UNORM = 0xD5,
+    RGBA8_SRGB = 0xD6,
 };

 class DebugContext;
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -19,7 +19,7 @@ public:
    virtual void DrawArrays() = 0;

    /// Notify rasterizer that the specified Maxwell register has been changed
-    virtual void NotifyMaxwellRegisterChanged(u32 id) = 0;
+    virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;

    /// Notify rasterizer that all caches should be flushed to Switch memory
    virtual void FlushAll() = 0;
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -446,7 +446,32 @@ void RasterizerOpenGL::BindTextures() {
    }
 }

-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {}
+void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {
+    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+    switch (method) {
+    case MAXWELL3D_REG_INDEX(blend.separate_alpha):
+        ASSERT_MSG(false, "unimplemented");
+        break;
+    case MAXWELL3D_REG_INDEX(blend.equation_rgb):
+        state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_source_rgb):
+        state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_dest_rgb):
+        state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.equation_a):
+        state.blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_source_a):
+        state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_dest_a):
+        state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
+        break;
+    }
+}

 void RasterizerOpenGL::FlushAll() {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@@ -498,7 +523,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu
    src_params.width = std::min(framebuffer.width, pixel_stride);
    src_params.height = framebuffer.height;
    src_params.stride = pixel_stride;
-    src_params.is_tiled = false;
+    src_params.is_tiled = true;
    src_params.pixel_format =
        SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
    src_params.UpdateParams();
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -32,7 +32,7 @@ public:
    ~RasterizerOpenGL() override;

    void DrawArrays() override;
-    void NotifyMaxwellRegisterChanged(u32 id) override;
+    void NotifyMaxwellRegisterChanged(u32 method) override;
    void FlushAll() override;
    void FlushRegion(VAddr addr, u64 size) override;
    void InvalidateRegion(VAddr addr, u64 size) override;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -20,6 +20,7 @@
 #include "common/math_util.h"
 #include "common/microprofile.h"
 #include "common/scope_exit.h"
+#include "common/swap.h"
 #include "common/vector_math.h"
 #include "core/core.h"
 #include "core/frontend/emu_window.h"
@@ -51,9 +52,14 @@ static constexpr std::array<FormatTuple, 1> fb_format_tuples = {{
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8
 }};

-static constexpr std::array<FormatTuple, 2> tex_format_tuples = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1},                       // RGBA8
-    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1
+static constexpr std::array<FormatTuple, 7> tex_format_tuples = {{
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1},                         // RGBA8
+    {GL_RGB5_A1, GL_RGB, GL_UNSIGNED_SHORT_5_5_5_1, false, 1},                      // RGB5A1
+    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, false, 1},                         // RGB565
+    {GL_R11F_G11F_B10F, GL_RGB, GL_FLOAT, false, 1},                                // R11FG11FB10F
+    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16},   // BC1
+    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // BC2
+    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // BC3
 }};

 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
@@ -85,24 +91,8 @@ static u16 GetResolutionScaleFactor() {
 }

 template <bool morton_to_gl, PixelFormat format>
-static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
-    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
-    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
-    for (u32 y = 0; y < 8; ++y) {
-        for (u32 x = 0; x < 8; ++x) {
-            u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
-            u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel;
-            if (morton_to_gl) {
-                std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel);
-            } else {
-                std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel);
-            }
-        }
-    }
-}
-
-template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) {
+void MortonCopy(u32 stride, u32 height, u32 block_height, u8* gl_buffer, VAddr base, VAddr start,
+                VAddr end) {
    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);

@@ -114,27 +104,67 @@ void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start,
 }

 template <>
-void MortonCopy<true, PixelFormat::DXT1>(u32 stride, u32 height, u8* gl_buffer, VAddr base,
-                                         VAddr start, VAddr end) {
-    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8;
-    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1);
+void MortonCopy<true, PixelFormat::BC1>(u32 stride, u32 height, u32 block_height, u8* gl_buffer,
+                                        VAddr base, VAddr start, VAddr end) {
+    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::BC1) / 8;
+    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::BC1);

    // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
    // configuration for this and perform more generic un/swizzle
    LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-    auto data =
-        Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height);
+    auto data = Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::BC1, stride,
+                                                 height, block_height);
    std::memcpy(gl_buffer, data.data(), data.size());
 }

-static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns = {
-    MortonCopy<true, PixelFormat::RGBA8>,
-    MortonCopy<true, PixelFormat::DXT1>,
+template <>
+void MortonCopy<true, PixelFormat::BC2>(u32 stride, u32 height, u32 block_height, u8* gl_buffer,
+                                        VAddr base, VAddr start, VAddr end) {
+    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::BC2) / 8;
+    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::BC2);
+
+    // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
+    // configuration for this and perform more generic un/swizzle
+    LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
+    auto data = Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::BC2, stride,
+                                                 height, block_height);
+    std::memcpy(gl_buffer, data.data(), data.size());
+}
+
+template <>
+void MortonCopy<true, PixelFormat::BC3>(u32 stride, u32 height, u32 block_height, u8* gl_buffer,
+                                        VAddr base, VAddr start, VAddr end) {
+    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::BC3) / 8;
+    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::BC3);
+
+    // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
+    // configuration for this and perform more generic un/swizzle
+    // NGLOG_CRITICAL(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
+    auto data = Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::BC3, stride,
+                                                 height, block_height);
+    std::memcpy(gl_buffer, data.data(), data.size());
+}
+
+static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), 7> morton_to_gl_fns =
+    {
+        MortonCopy<true, PixelFormat::RGBA8>,     // RGBA8
+        MortonCopy<true, PixelFormat::RGB5A1>,    // RGB5A1
+        MortonCopy<true, PixelFormat::RGB565>,    // RGB565
+        MortonCopy<true, PixelFormat::RG11FB10F>, // RG11FB10F
+        MortonCopy<true, PixelFormat::BC1>,       // BC1
+        MortonCopy<true, PixelFormat::BC2>,       // BC2
+        MortonCopy<true, PixelFormat::BC3>,       // BC3
 };

-static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns = {
-    MortonCopy<false, PixelFormat::RGBA8>,
-    MortonCopy<false, PixelFormat::DXT1>,
+static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), 7> gl_to_morton_fns =
+    {
+        MortonCopy<false, PixelFormat::RGBA8>,     // RGBA8
+        MortonCopy<false, PixelFormat::RGB5A1>,    // RGB5A1
+        MortonCopy<false, PixelFormat::RGB565>,    // RGB565
+        MortonCopy<false, PixelFormat::RG11FB10F>, // RG11FB10F
+        MortonCopy<false, PixelFormat::BC1>,       // BC1
+        MortonCopy<false, PixelFormat::BC2>,       // BC2
+        MortonCopy<false, PixelFormat::BC3>,       // BC3
 };

 // Allocate an uninitialized texture of appropriate size and format for the surface
@@ -483,16 +513,18 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) {
    if (!is_tiled) {
        ASSERT(type == SurfaceType::Color);
        const u32 bytes_per_pixel{GetFormatBpp() >> 3};
+        std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
+                    bytes_per_pixel * width * height);
+
+        // TODO(bunnei): HACK HACK HACK - Remove before checkin!
+        u32* gl_words = reinterpret_cast<u32*>(&gl_buffer[start_offset]);
+        for (unsigned index = 0; index < width * height; ++index) {
+            gl_words[index] = Common::swap32(gl_words[index]);
+        }

-        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
-        // the configuration for this and perform more generic un/swizzle
-        LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-        VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4,
-                                       texture_src_data + start_offset, &gl_buffer[start_offset],
-                                       true);
    } else {
-        morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
-                                                            load_start, load_end);
+        morton_to_gl_fns[static_cast<size_t>(pixel_format)](
+            stride, height, block_height, &gl_buffer[0], addr, load_start, load_end);
    }
 }

@@ -536,8 +568,8 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) {
        ASSERT(type == SurfaceType::Color);
        std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start);
    } else {
-        gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
-                                                            flush_start, flush_end);
+        gl_to_morton_fns[static_cast<size_t>(pixel_format)](
+            stride, height, block_height, &gl_buffer[0], addr, flush_start, flush_end);
    }
 }

@@ -1040,6 +1072,7 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
    params.width = config.tic.Width();
    params.height = config.tic.Height();
    params.is_tiled = config.tic.IsTiled();
+    params.block_height = config.tic.BlockHeight();
    params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
    params.UpdateParams();

--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -52,8 +52,17 @@ enum class ScaleMatch {

 struct SurfaceParams {
    enum class PixelFormat {
+        // Texture and color buffer formats
        RGBA8 = 0,
-        DXT1 = 1,
+        RGB5A1 = 1,
+        RGB565 = 2,
+        RG11FB10F = 3,
+
+        // Compressed Texture formats
+        BC1 = 4,
+        BC2 = 5,
+        BC3 = 6,
+
        Invalid = 255,
    };

@@ -70,9 +79,14 @@ struct SurfaceParams {
        if (format == PixelFormat::Invalid)
            return 0;

-        constexpr std::array<unsigned int, 2> bpp_table = {
-            32, // RGBA8
-            64, // DXT1
+        constexpr std::array<unsigned int, 7> bpp_table = {
+            32,  // RGBA8
+            16,  // RGB5A1
+            16,  // RGB565
+            32,  // RG11FB10F
+            64,  // BC1
+            128, // BC2
+            128, // BC3
        };

        ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -87,6 +101,7 @@ struct SurfaceParams {
        case Tegra::RenderTargetFormat::RGBA8_UNORM:
            return PixelFormat::RGBA8;
        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
        }
    }
@@ -96,6 +111,7 @@ struct SurfaceParams {
        case Tegra::FramebufferConfig::PixelFormat::ABGR8:
            return PixelFormat::RGBA8;
        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
        }
    }
@@ -105,9 +121,14 @@ struct SurfaceParams {
        switch (format) {
        case Tegra::Texture::TextureFormat::A8R8G8B8:
            return PixelFormat::RGBA8;
-        case Tegra::Texture::TextureFormat::DXT1:
-            return PixelFormat::DXT1;
+        case Tegra::Texture::TextureFormat::BC1:
+            return PixelFormat::BC1;
+        case Tegra::Texture::TextureFormat::BC2:
+            return PixelFormat::BC2;
+        case Tegra::Texture::TextureFormat::BC3:
+            return PixelFormat::BC3;
        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
        }
    }
@@ -137,7 +158,7 @@ struct SurfaceParams {
            return SurfaceType::Color;
        }

-        if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::DXT1)) {
+        if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::BC3)) {
            return SurfaceType::Texture;
        }

@@ -210,9 +231,10 @@ struct SurfaceParams {
    u32 width = 0;
    u32 height = 0;
    u32 stride = 0;
+    u32 block_height = 0;
    u16 res_scale = 1;

-    bool is_tiled = false;
+    bool is_tiled = true;
    PixelFormat pixel_format = PixelFormat::Invalid;
    SurfaceType type = SurfaceType::Invalid;
 };
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -17,6 +17,7 @@ using Tegra::Shader::Attribute;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
+using Tegra::Shader::Sampler;
 using Tegra::Shader::SubOp;
 using Tegra::Shader::Uniform;

@@ -155,23 +156,27 @@ private:

    /// Generates code representing an input attribute register.
    std::string GetInputAttribute(Attribute::Index attribute) {
-        declr_input_attribute.insert(attribute);
+        switch (attribute) {
+        case Attribute::Index::Position:
+            return "position";
+        default:
+            const u32 index{static_cast<u32>(attribute) -
+                            static_cast<u32>(Attribute::Index::Attribute_0)};
+            if (attribute >= Attribute::Index::Attribute_0) {
+                declr_input_attribute.insert(attribute);
+                return "input_attribute_" + std::to_string(index);
+            }

-        const u32 index{static_cast<u32>(attribute) -
-                        static_cast<u32>(Attribute::Index::Attribute_0)};
-        if (attribute >= Attribute::Index::Attribute_0) {
-            return "input_attribute_" + std::to_string(index);
+            NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
+            UNREACHABLE();
        }
-
-        LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index);
-        UNREACHABLE();
    }

    /// Generates code representing an output attribute register.
    std::string GetOutputAttribute(Attribute::Index attribute) {
        switch (attribute) {
        case Attribute::Index::Position:
-            return "gl_Position";
+            return "position";
        default:
            const u32 index{static_cast<u32>(attribute) -
                            static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -180,22 +185,42 @@ private:
                return "output_attribute_" + std::to_string(index);
            }

-            LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index);
+            NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index);
            UNREACHABLE();
        }
    }

+    /// Generates code representing an immediate value
+    static std::string GetImmediate(const Instruction& instr) {
+        return std::to_string(instr.alu.GetImm20());
+    }
+
    /// Generates code representing a temporary (GPR) register.
-    std::string GetRegister(const Register& reg) {
-        return *declr_register.insert("register_" + std::to_string(reg)).first;
+    std::string GetRegister(const Register& reg, unsigned elem = 0) {
+        if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) {
+            // GPRs 0-3 are output color for the fragment shader
+            return std::string{"color."} + "rgba"[(reg + elem) & 3];
+        }
+
+        return *declr_register.insert("register_" + std::to_string(reg + elem)).first;
    }

    /// Generates code representing a uniform (C buffer) register.
    std::string GetUniform(const Uniform& reg) {
-        declr_const_buffers[reg.index].MarkAsUsed(reg.index, reg.offset, stage);
+        declr_const_buffers[reg.index].MarkAsUsed(static_cast<unsigned>(reg.index),
+                                                  static_cast<unsigned>(reg.offset), stage);
        return 'c' + std::to_string(reg.index) + '[' + std::to_string(reg.offset) + ']';
    }

+    /// Generates code representing a texture sampler.
+    std::string GetSampler(const Sampler& sampler) const {
+        // TODO(Subv): Support more than just texture sampler 0
+        ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported");
+        const unsigned index{static_cast<unsigned>(sampler.index.Value()) -
+                             static_cast<unsigned>(Sampler::Index::Sampler_0)};
+        return "tex[" + std::to_string(index) + "]";
+    }
+
    /**
     * Adds code that calls a subroutine.
     * @param subroutine the subroutine to call.
@@ -217,12 +242,13 @@ private:
     * @param value the code representing the value to assign.
     */
    void SetDest(u64 elem, const std::string& reg, const std::string& value,
-                 u64 dest_num_components, u64 value_num_components) {
+                 u64 dest_num_components, u64 value_num_components, bool is_abs = false) {
        std::string swizzle = ".";
        swizzle += "xyzw"[elem];

        std::string dest = reg + (dest_num_components != 1 ? swizzle : "");
        std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : "");
+        src = is_abs ? "abs(" + src + ")" : src;

        shader.AddLine(dest + " = " + src + ";");
    }
@@ -240,8 +266,6 @@ private:

        switch (OpCode::GetInfo(instr.opcode).type) {
        case OpCode::Type::Arithmetic: {
-            ASSERT(!instr.alu.abs_d);
-
            std::string dest = GetRegister(instr.gpr0);
            std::string op_a = instr.alu.negate_a ? "-" : "";
            op_a += GetRegister(instr.gpr8);
@@ -250,63 +274,109 @@ private:
            }

            std::string op_b = instr.alu.negate_b ? "-" : "";
-            if (instr.is_b_gpr) {
-                op_b += GetRegister(instr.gpr20);
+
+            if (instr.is_b_imm) {
+                op_b += GetImmediate(instr);
            } else {
-                op_b += GetUniform(instr.uniform);
+                if (instr.is_b_gpr) {
+                    op_b += GetRegister(instr.gpr20);
+                } else {
+                    op_b += GetUniform(instr.uniform);
+                }
            }
+
            if (instr.alu.abs_b) {
                op_b = "abs(" + op_b + ")";
            }

            switch (instr.opcode.EffectiveOpCode()) {
            case OpCode::Id::FMUL_C:
-            case OpCode::Id::FMUL_R: {
-                SetDest(0, dest, op_a + " * " + op_b, 1, 1);
+            case OpCode::Id::FMUL_R:
+            case OpCode::Id::FMUL_IMM: {
+                SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
                break;
            }
            case OpCode::Id::FADD_C:
-            case OpCode::Id::FADD_R: {
-                SetDest(0, dest, op_a + " + " + op_b, 1, 1);
+            case OpCode::Id::FADD_R:
+            case OpCode::Id::FADD_IMM: {
+                SetDest(0, dest, op_a + " + " + op_b, 1, 1, instr.alu.abs_d);
+                break;
+            }
+            case OpCode::Id::MUFU: {
+                switch (instr.sub_op) {
+                case SubOp::Cos:
+                    SetDest(0, dest, "cos(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Sin:
+                    SetDest(0, dest, "sin(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Ex2:
+                    SetDest(0, dest, "exp2(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Lg2:
+                    SetDest(0, dest, "log2(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Rcp:
+                    SetDest(0, dest, "1.0 / " + op_a, 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Rsq:
+                    SetDest(0, dest, "inversesqrt(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Min:
+                    SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                default:
+                    NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {}",
+                                   static_cast<unsigned>(instr.sub_op.Value()));
+                    UNREACHABLE();
+                }
                break;
            }
            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
-                break;
+                NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                // UNREACHABLE();
            }
            }
            break;
        }
        case OpCode::Type::Ffma: {
-            ASSERT_MSG(!instr.ffma.negate_b, "untested");
-            ASSERT_MSG(!instr.ffma.negate_c, "untested");
-
            std::string dest = GetRegister(instr.gpr0);
            std::string op_a = GetRegister(instr.gpr8);
-
            std::string op_b = instr.ffma.negate_b ? "-" : "";
-            op_b += GetUniform(instr.uniform);
-
            std::string op_c = instr.ffma.negate_c ? "-" : "";
-            op_c += GetRegister(instr.gpr39);

            switch (instr.opcode.EffectiveOpCode()) {
            case OpCode::Id::FFMA_CR: {
-                SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
+                op_b += GetUniform(instr.uniform);
+                op_c += GetRegister(instr.gpr39);
                break;
            }
+            case OpCode::Id::FFMA_RR: {
+                op_b += GetRegister(instr.gpr20);
+                op_c += GetRegister(instr.gpr39);
+                break;
+            }
+            case OpCode::Id::FFMA_RC: {
+                op_b += GetRegister(instr.gpr39);
+                op_c += GetUniform(instr.uniform);
+                break;
+            }
+            case OpCode::Id::FFMA_IMM: {
+                op_b += GetImmediate(instr);
+                op_c += GetRegister(instr.gpr39);
+                break;
+            }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
+            }
+            }

-            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
-                break;
-            }
-            }
+            SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
            break;
        }
        case OpCode::Type::Memory: {
@@ -315,22 +385,33 @@ private:

            switch (instr.opcode.EffectiveOpCode()) {
            case OpCode::Id::LD_A: {
-                ASSERT(instr.attribute.fmt20.size == 0);
+                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
                SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4);
                break;
            }
            case OpCode::Id::ST_A: {
-                ASSERT(instr.attribute.fmt20.size == 0);
+                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
                SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1);
                break;
            }
-            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
+            case OpCode::Id::TEXS: {
+                ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
+                const std::string op_a = GetRegister(instr.gpr8);
+                const std::string op_b = GetRegister(instr.gpr20);
+                const std::string sampler = GetSampler(instr.sampler);
+                const std::string coord = "vec2(" + op_a + ", " + op_b + ")";
+                const std::string texture = "texture(" + sampler + ", " + coord + ")";
+                for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
+                    SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4);
+                }
                break;
            }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
+            }
            }
            break;
        }
@@ -342,14 +423,18 @@ private:
                offset = PROGRAM_END - 1;
                break;
            }
-
-            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
+            case OpCode::Id::IPA: {
+                const auto& attribute = instr.attribute.fmt28;
+                std::string dest = GetRegister(instr.gpr0);
+                SetDest(attribute.element, dest, GetInputAttribute(attribute.index), 1, 4);
                break;
            }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                // UNREACHABLE();
+            }
            }

            break;
@@ -514,7 +599,7 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
        GLSLGenerator generator(subroutines, program_code, main_offset, stage);
        return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
    } catch (const DecompileFail& exception) {
-        LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what());
+        NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
    }
    return boost::none;
 }
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -27,10 +27,19 @@ out gl_PerVertex {
    vec4 gl_Position;
 };

+out vec4 position;
+
+layout (std140) uniform vs_config {
+    vec4 viewport_flip;
+};
+
 void main() {
    exec_shader();
-}

+    // Viewport can be flipped, which is unsupported by glViewport
+    position.xy *= viewport_flip.xy;
+    gl_Position = position;
+}
 )";
    out += program.first;
    return {out, program.second};
@@ -46,8 +55,13 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo
                                .get_value_or({});
    out += R"(

+in vec4 position;
 out vec4 color;

+layout (std140) uniform fs_config {
+    vec4 viewport_flip;
+};
+
 uniform sampler2D tex[32];

 void main() {
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -53,6 +53,10 @@ void SetShaderSamplerBindings(GLuint shader) {

 } // namespace Impl

-void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {}
+void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
+    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+    viewport_flip[0] = regs.viewport_scale_x < 0.0 ? -1.0 : 1.0;
+    viewport_flip[1] = regs.viewport_scale_y < 0.0 ? -1.0 : 1.0;
+}

 } // namespace GLShader
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -30,10 +30,9 @@ void SetShaderSamplerBindings(GLuint shader);
 //       Not following that rule will cause problems on some AMD drivers.
 struct MaxwellUniformData {
    void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
-    // TODO(Subv): Use this for something.
+    alignas(16) GLvec4 viewport_flip;
 };
-// static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is
-// incorrect");
+static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
 static_assert(sizeof(MaxwellUniformData) < 16384,
              "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");

--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -102,4 +102,68 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
    return {};
 }

+inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
+    switch (equation) {
+    case Maxwell::Blend::Equation::Add:
+        return GL_FUNC_ADD;
+    case Maxwell::Blend::Equation::Subtract:
+        return GL_FUNC_SUBTRACT;
+    case Maxwell::Blend::Equation::ReverseSubtract:
+        return GL_FUNC_REVERSE_SUBTRACT;
+    case Maxwell::Blend::Equation::Min:
+        return GL_MIN;
+    case Maxwell::Blend::Equation::Max:
+        return GL_MAX;
+    }
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
+    UNREACHABLE();
+    return {};
+}
+
+inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
+    switch (factor) {
+    case Maxwell::Blend::Factor::Zero:
+        return GL_ZERO;
+    case Maxwell::Blend::Factor::One:
+        return GL_ONE;
+    case Maxwell::Blend::Factor::SourceColor:
+        return GL_SRC_COLOR;
+    case Maxwell::Blend::Factor::OneMinusSourceColor:
+        return GL_ONE_MINUS_SRC_COLOR;
+    case Maxwell::Blend::Factor::SourceAlpha:
+        return GL_SRC_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusSourceAlpha:
+        return GL_ONE_MINUS_SRC_ALPHA;
+    case Maxwell::Blend::Factor::DestAlpha:
+        return GL_DST_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusDestAlpha:
+        return GL_ONE_MINUS_DST_ALPHA;
+    case Maxwell::Blend::Factor::DestColor:
+        return GL_DST_COLOR;
+    case Maxwell::Blend::Factor::OneMinusDestColor:
+        return GL_ONE_MINUS_DST_COLOR;
+    case Maxwell::Blend::Factor::SourceAlphaSaturate:
+        return GL_SRC_ALPHA_SATURATE;
+    case Maxwell::Blend::Factor::Source1Color:
+        return GL_SRC1_COLOR;
+    case Maxwell::Blend::Factor::OneMinusSource1Color:
+        return GL_ONE_MINUS_SRC1_COLOR;
+    case Maxwell::Blend::Factor::Source1Alpha:
+        return GL_SRC1_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusSource1Alpha:
+        return GL_ONE_MINUS_SRC1_ALPHA;
+    case Maxwell::Blend::Factor::ConstantColor:
+        return GL_CONSTANT_COLOR;
+    case Maxwell::Blend::Factor::OneMinusConstantColor:
+        return GL_ONE_MINUS_CONSTANT_COLOR;
+    case Maxwell::Blend::Factor::ConstantAlpha:
+        return GL_CONSTANT_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusConstantAlpha:
+        return GL_ONE_MINUS_CONSTANT_ALPHA;
+    }
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
+    UNREACHABLE();
+    return {};
+}
+
 } // namespace MaxwellToGL
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -45,18 +45,26 @@ static void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out

 u32 BytesPerPixel(TextureFormat format) {
    switch (format) {
-    case TextureFormat::DXT1:
-        // In this case a 'pixel' actually refers to a 4x4 tile.
-        return 8;
    case TextureFormat::A8R8G8B8:
+    case TextureFormat::BF10GF11RF11:
        return 4;
+    case TextureFormat::A1B5G5R5:
+    case TextureFormat::B5G6R5:
+        return 2;
+    // In this case a 'pixel' actually refers to a 4x4 tile.
+    case TextureFormat::BC1:
+        return 8;
+    case TextureFormat::BC2:
+    case TextureFormat::BC3:
+        return 16;
    default:
        UNIMPLEMENTED_MSG("Format not implemented");
        break;
    }
 }

-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) {
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
+                                 u32 block_height) {
    u8* data = Memory::GetPointer(address);
    u32 bytes_per_pixel = BytesPerPixel(format);

@@ -65,11 +73,21 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
    std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);

    switch (format) {
-    case TextureFormat::DXT1:
-        // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values.
+    case TextureFormat::BC1:
+        // In the BC1 format, each 4x4 tile is swizzled instead of just individual pixel values.
        CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
                         unswizzled_data.data(), true, DefaultBlockHeight);
        break;
+    case TextureFormat::BC2:
+        // TODO
+        CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
+                         unswizzled_data.data(), true, DefaultBlockHeight);
+        break;
+    case TextureFormat::BC3:
+        // TODO
+        CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
+                         unswizzled_data.data(), true, block_height);
+        break;
    case TextureFormat::A8R8G8B8:
        CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
                         unswizzled_data.data(), true, DefaultBlockHeight);
@@ -88,8 +106,13 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat

    // TODO(Subv): Implement.
    switch (format) {
-    case TextureFormat::DXT1:
+    case TextureFormat::BC1:
+    case TextureFormat::BC2:
+    case TextureFormat::BC3:
    case TextureFormat::A8R8G8B8:
+    case TextureFormat::A1B5G5R5:
+    case TextureFormat::B5G6R5:
+    case TextureFormat::BF10GF11RF11:
        // TODO(Subv): For the time being just forward the same data without any decoding.
        rgba_data = texture_data;
        break;
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -14,7 +14,8 @@ namespace Texture {
 /**
 * Unswizzles a swizzled texture without changing its format.
 */
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height);
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
+                                 u32 block_height);

 /**
 * Decodes an unswizzled texture into a A8R8G8B8 texture.
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -13,8 +13,14 @@ namespace Tegra {
 namespace Texture {

 enum class TextureFormat : u32 {
-    A8R8G8B8 = 8,
-    DXT1 = 0x24,
+    A8R8G8B8 = 0x8,
+    A1B5G5R5 = 0x14,
+    B5G6R5 = 0x15,
+    BF10GF11RF11 = 0x21,
+    // Compressed Textures
+    BC1 = 0x24,
+    BC2 = 0x25,
+    BC3 = 0x26,
 };

 enum class TextureType : u32 {
@@ -68,7 +74,10 @@ struct TICEntry {
        BitField<0, 16, u32> address_high;
        BitField<21, 3, TICHeaderVersion> header_version;
    };
-    INSERT_PADDING_BYTES(4);
+    union {
+        BitField<3, 3, u8> gobs_per_block;
+    };
+    INSERT_PADDING_BYTES(3);
    union {
        BitField<0, 16, u32> width_minus_1;
        BitField<23, 4, TextureType> texture_type;
@@ -92,6 +101,10 @@ struct TICEntry {
        return header_version == TICHeaderVersion::BlockLinear ||
               header_version == TICHeaderVersion::BlockLinearColorKey;
    }
+
+    u32 BlockHeight() const {
+        return 1 << gobs_per_block;
+    }
 };
 static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");

--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -378,8 +378,8 @@ void GraphicsSurfaceWidget::OnUpdate() {
    QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
    VAddr address = gpu.memory_manager->PhysicalToVirtualAddress(surface_address);

-    auto unswizzled_data =
-        Tegra::Texture::UnswizzleTexture(address, surface_format, surface_width, surface_height);
+    auto unswizzled_data = Tegra::Texture::UnswizzleTexture(address, surface_format, surface_width,
+                                                            surface_height, 16);

    auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
                                                      surface_width, surface_height);
Author	SHA1	Message	Date
bunnei	8bff67e0c1	gl_shader_decompiler: HACK: Remove some unreachables so games run.	2018-04-17 22:42:07 -04:00
bunnei	c71b78f3bc	gl_rasterizer_cache: HACK: Pre-swap raw textures before uploading them. - Temporary fix for Cave Story, will be removed before merging.	2018-04-17 22:25:30 -04:00
bunnei	24a47f6e18	gl_shader_gen: Support vertical/horizontal viewport flipping.	2018-04-17 22:25:29 -04:00
bunnei	e4d3f578d1	renderer_opengl: Support unswizzled textures.	2018-04-17 22:23:14 -04:00
bunnei	185556025e	(jroweboy) textures: Add support for other formats.	2018-04-17 22:21:44 -04:00
bunnei	c93ea96366	Merge pull request #346 from bunnei/misc-gpu-improvements Misc gpu improvements	2018-04-17 22:17:07 -04:00
bunnei	71b4a3b9f6	Merge pull request #344 from bunnei/shader-decompiler-p2 Shader decompiler changes part 2	2018-04-17 22:10:53 -04:00
bunnei	9dc0d13ba5	Merge pull request #345 from bunnei/blending renderer_opengl: Implement BlendEquation and BlendFunc.	2018-04-17 21:45:36 -04:00
bunnei	7222d9a4c3	gl_rasterizer_cache: Add missing LOG statements.	2018-04-17 21:44:36 -04:00
bunnei	9df8e924fb	texture: Add missing formats.	2018-04-17 21:41:36 -04:00
bunnei	3ed8a1cac7	gpu: Add several framebuffer formats to RenderTargetFormat.	2018-04-17 21:40:38 -04:00
bunnei	4a8eb6745e	maxwell3d: Allow Texture2DNoMipmap as Texture2D.	2018-04-17 21:39:15 -04:00
bunnei	531c25386e	shader_bytecode: Make ctor's constexpr and explicit.	2018-04-17 21:27:07 -04:00
bunnei	174cba5c58	renderer_opengl: Implement BlendEquation and BlendFunc.	2018-04-17 18:11:48 -04:00
bunnei	e59126809c	bit_field: Remove is_pod check, add is_trivially_copyable_v.	2018-04-17 18:00:18 -04:00
bunnei	1f6fe062ca	gl_shader_decompiler: Fix warnings with MarkAsUsed.	2018-04-17 16:36:44 -04:00
bunnei	ed542a7309	gl_shader_decompiler: Cleanup logging, updating to NGLOG_*.	2018-04-17 16:36:44 -04:00
bunnei	ef2d5ab0c1	gl_shader_decompiler: Implement several MUFU subops and abs_d.	2018-04-17 16:36:43 -04:00
bunnei	59f4ff4659	gl_shader_decompiler: Fix swizzle in GetRegister.	2018-04-17 16:36:42 -04:00
bunnei	5a28dce9eb	gl_shader_decompiler: Implement FMUL/FADD/FFMA immediate instructions.	2018-04-17 16:36:42 -04:00
bunnei	8d4899d6ea	gl_shader_decompiler: Allow vertex position to be used in fragment shader.	2018-04-17 16:36:40 -04:00
bunnei	95144cc39c	gl_shader_decompiler: Implement IPA instruction.	2018-04-17 16:36:39 -04:00
bunnei	8b4443c966	gl_shader_decompiler: Add support for TEXS instruction.	2018-04-17 16:36:38 -04:00
bunnei	5ba71369ac	gl_shader_decompiler: Use fragment output color for GPR 0-3.	2018-04-17 15:25:54 -04:00
bunnei	5d529698c9	gl_shader_decompiler: Partially implement MUFU.	2018-04-17 15:25:54 -04:00
bunnei	5b9bcbf438	Merge pull request #341 from shinyquagsire23/pfs-hfs-impl file_sys: Add HFS/PFS helper component	2018-04-17 14:39:20 -04:00
shinyquagsire23	de580ccdd5	file_sys: Use NGLOG	2018-04-17 09:55:29 -06:00
shinyquagsire23	83aa38b239	file_sys: tweaks	2018-04-16 06:51:59 -06:00
shinyquagsire23	c03795300a	file_sys: Add HFS/PFS helper component	2018-04-16 04:36:25 -06:00