GPU: Allow using the old NV04 values for the depth test function.

These seem to be just a valid as the GL token values. Thanks @ReinUsesLisp This restores graphical output to Disgaea 5
Merge pull request #624 from Subv/f2f_round
2018-07-05 13:01:31 -05:00 · 2018-07-05 11:30:29 -04:00 · 2018-07-05 11:30:01 -04:00 · 2018-07-05 11:29:17 -04:00 · 2018-07-05 11:28:50 -04:00 · 2018-07-05 11:09:15 -04:00
14 changed files with 455 additions and 53 deletions
--- a/CMakeModules/CopyYuzuQt5Deps.cmake
+++ b/CMakeModules/CopyYuzuQt5Deps.cmake
@@ -3,7 +3,9 @@ function(copy_yuzu_Qt5_deps target_dir)
    set(DLL_DEST "${CMAKE_BINARY_DIR}/bin/$<CONFIG>/")
    set(Qt5_DLL_DIR "${Qt5_DIR}/../../../bin")
    set(Qt5_PLATFORMS_DIR "${Qt5_DIR}/../../../plugins/platforms/")
+    set(Qt5_STYLES_DIR "${Qt5_DIR}/../../../plugins/styles/")
    set(PLATFORMS ${DLL_DEST}platforms/)
+    set(STYLES ${DLL_DEST}styles/)
    windows_copy_files(${target_dir} ${Qt5_DLL_DIR} ${DLL_DEST}
        icudt*.dll
        icuin*.dll
@@ -14,4 +16,5 @@ function(copy_yuzu_Qt5_deps target_dir)
        Qt5Widgets$<$<CONFIG:Debug>:d>.*
    )
    windows_copy_files(yuzu ${Qt5_PLATFORMS_DIR} ${PLATFORMS} qwindows$<$<CONFIG:Debug>:d>.*)
+    windows_copy_files(yuzu ${Qt5_STYLES_DIR} ${STYLES} qwindowsvistastyle$<$<CONFIG:Debug>:d>.*)
 endfunction(copy_yuzu_Qt5_deps)
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -116,6 +116,7 @@ after_build:

          mkdir $RELEASE_DIST
          mkdir $RELEASE_DIST/platforms
+          mkdir $RELEASE_DIST/styles

          # copy the compiled binaries and other release files to the release folder
          Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
@@ -136,6 +137,9 @@ after_build:
          # copy the qt windows plugin dll to platforms
          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms"

+          # copy the qt windows vista style dll to platforms
+          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/styles/qwindowsvistastyle.dll" -force -destination "$RELEASE_DIST/styles"
+
          7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\*
          7z a $MINGW_SEVENZIP $RELEASE_DIST
        }
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -47,6 +47,7 @@ public:

        // Start the audio event
        CoreTiming::ScheduleEvent(audio_ticks, audio_event);
+        voice_status_list.reserve(worker_params.voice_count);
    }
    ~IAudioRenderer() {
        CoreTiming::UnscheduleEvent(audio_event, 0);
@@ -68,6 +69,12 @@ private:
                    buf.data() + sizeof(UpdateDataHeader) + config.behavior_size,
                    memory_pool_count * sizeof(MemoryPoolInfo));

+        std::vector<VoiceInfo> voice_info(worker_params.voice_count);
+        std::memcpy(voice_info.data(),
+                    buf.data() + sizeof(UpdateDataHeader) + config.behavior_size +
+                        config.memory_pools_size + config.voice_resource_size,
+                    worker_params.voice_count * sizeof(VoiceInfo));
+
        UpdateDataHeader response_data{worker_params};

        ASSERT(ctx.GetWriteBufferSize() == response_data.total_size);
@@ -86,6 +93,23 @@ private:
        std::memcpy(output.data() + sizeof(UpdateDataHeader), memory_pool.data(),
                    response_data.memory_pools_size);

+        for (unsigned i = 0; i < voice_info.size(); i++) {
+            if (voice_info[i].is_new) {
+                voice_status_list[i].played_sample_count = 0;
+                voice_status_list[i].wave_buffer_consumed = 0;
+            } else if (voice_info[i].play_state == (u8)PlayStates::Started) {
+                for (u32 buff_idx = 0; buff_idx < voice_info[i].wave_buffer_count; buff_idx++) {
+                    voice_status_list[i].played_sample_count +=
+                        (voice_info[i].wave_buffer[buff_idx].end_sample_offset -
+                         voice_info[i].wave_buffer[buff_idx].start_sample_offset) /
+                        2;
+                    voice_status_list[i].wave_buffer_consumed++;
+                }
+            }
+        }
+        std::memcpy(output.data() + sizeof(UpdateDataHeader) + response_data.memory_pools_size,
+                    voice_status_list.data(), response_data.voices_size);
+
        ctx.WriteBuffer(output);

        IPC::ResponseBuilder rb{ctx, 2};
@@ -130,6 +154,11 @@ private:
        Released = 0x6,
    };

+    enum class PlayStates : u8 {
+        Started = 0,
+        Stopped = 1,
+    };
+
    struct MemoryPoolEntry {
        MemoryPoolStates state;
        u32_le unknown_4;
@@ -175,11 +204,69 @@ private:
    };
    static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size");

+    struct BiquadFilter {
+        u8 enable;
+        INSERT_PADDING_BYTES(1);
+        s16_le numerator[3];
+        s16_le denominator[2];
+    };
+    static_assert(sizeof(BiquadFilter) == 0xc, "BiquadFilter has wrong size");
+
+    struct WaveBuffer {
+        u64_le buffer_addr;
+        u64_le buffer_sz;
+        s32_le start_sample_offset;
+        s32_le end_sample_offset;
+        u8 loop;
+        u8 end_of_stream;
+        u8 sent_to_server;
+        INSERT_PADDING_BYTES(5);
+        u64 context_addr;
+        u64 context_sz;
+        INSERT_PADDING_BYTES(8);
+    };
+    static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer has wrong size");
+
+    struct VoiceInfo {
+        u32_le id;
+        u32_le node_id;
+        u8 is_new;
+        u8 is_in_use;
+        u8 play_state;
+        u8 sample_format;
+        u32_le sample_rate;
+        u32_le priority;
+        u32_le sorting_order;
+        u32_le channel_count;
+        float_le pitch;
+        float_le volume;
+        BiquadFilter biquad_filter[2];
+        u32_le wave_buffer_count;
+        u16_le wave_buffer_head;
+        INSERT_PADDING_BYTES(6);
+        u64_le additional_params_addr;
+        u64_le additional_params_sz;
+        u32_le mix_id;
+        u32_le splitter_info_id;
+        WaveBuffer wave_buffer[4];
+        u32_le voice_channel_resource_ids[6];
+        INSERT_PADDING_BYTES(24);
+    };
+    static_assert(sizeof(VoiceInfo) == 0x170, "VoiceInfo is wrong size");
+
+    struct VoiceOutStatus {
+        u64_le played_sample_count;
+        u32_le wave_buffer_consumed;
+        INSERT_PADDING_WORDS(1);
+    };
+    static_assert(sizeof(VoiceOutStatus) == 0x10, "VoiceOutStatus has wrong size");
+
    /// This is used to trigger the audio event callback.
    CoreTiming::EventType* audio_event;

    Kernel::SharedPtr<Kernel::Event> system_event;
    AudioRendererParameter worker_params;
+    std::vector<VoiceOutStatus> voice_status_list;
 };

 class IAudioDevice final : public ServiceFramework<IAudioDevice> {
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -126,6 +126,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
        DrawArrays();
        break;
    }
+    case MAXWELL3D_REG_INDEX(clear_buffers): {
+        ProcessClearBuffers();
+        break;
+    }
    case MAXWELL3D_REG_INDEX(query.query_get): {
        ProcessQueryGet();
        break;
@@ -415,5 +419,13 @@ bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const {
    UNREACHABLE();
 }

+void Maxwell3D::ProcessClearBuffers() {
+    ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
+           regs.clear_buffers.R == regs.clear_buffers.B &&
+           regs.clear_buffers.R == regs.clear_buffers.A);
+
+    VideoCore::g_renderer->Rasterizer()->Clear();
+}
+
 } // namespace Engines
 } // namespace Tegra
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -281,14 +281,26 @@ public:
        };

        enum class ComparisonOp : u32 {
-            Never = 0,
-            Less = 1,
-            Equal = 2,
-            LessEqual = 3,
-            Greater = 4,
-            NotEqual = 5,
-            GreaterEqual = 6,
-            Always = 7,
+            // These values are used by Nouveau and most games, they correspond to the OpenGL token
+            // values for these operations.
+            Never = 0x200,
+            Less = 0x201,
+            Equal = 0x202,
+            LessEqual = 0x203,
+            Greater = 0x204,
+            NotEqual = 0x205,
+            GreaterEqual = 0x206,
+            Always = 0x207,
+
+            // These values are used by some games, they seem to be NV04 values.
+            NeverOld = 1,
+            LessOld = 2,
+            EqualOld = 3,
+            LessEqualOld = 4,
+            GreaterOld = 5,
+            NotEqualOld = 6,
+            GreaterEqualOld = 7,
+            AlwaysOld = 8,
        };

        struct Cull {
@@ -436,7 +448,12 @@ public:
                    u32 count;
                } vertex_buffer;

-                INSERT_PADDING_WORDS(0x99);
+                INSERT_PADDING_WORDS(1);
+
+                float clear_color[4];
+                float clear_depth;
+
+                INSERT_PADDING_WORDS(0x93);

                struct {
                    u32 address_high;
@@ -473,9 +490,11 @@ public:

                u32 depth_write_enabled;

-                INSERT_PADDING_WORDS(0x8);
+                INSERT_PADDING_WORDS(0x7);

-                BitField<0, 3, ComparisonOp> depth_test_func;
+                u32 d3d_cull_mode;
+
+                ComparisonOp depth_test_func;

                INSERT_PADDING_WORDS(0xB);

@@ -493,7 +512,13 @@ public:
                    u32 enable[NumRenderTargets];
                } blend;

-                INSERT_PADDING_WORDS(0x2D);
+                INSERT_PADDING_WORDS(0xB);
+
+                union {
+                    BitField<4, 1, u32> triangle_rast_flip;
+                } screen_y_control;
+
+                INSERT_PADDING_WORDS(0x21);

                u32 vb_element_base;

@@ -523,7 +548,12 @@ public:
                    }
                } tic;

-                INSERT_PADDING_WORDS(0x22);
+                INSERT_PADDING_WORDS(0x21);
+
+                union {
+                    BitField<2, 1, u32> coord_origin;
+                    BitField<3, 10, u32> enable;
+                } point_coord_replace;

                struct {
                    u32 code_address_high;
@@ -584,7 +614,21 @@ public:

                Cull cull;

-                INSERT_PADDING_WORDS(0x77);
+                INSERT_PADDING_WORDS(0x2B);
+
+                union {
+                    u32 raw;
+                    BitField<0, 1, u32> Z;
+                    BitField<1, 1, u32> S;
+                    BitField<2, 1, u32> R;
+                    BitField<3, 1, u32> G;
+                    BitField<4, 1, u32> B;
+                    BitField<5, 1, u32> A;
+                    BitField<6, 4, u32> RT;
+                    BitField<10, 11, u32> layer;
+                } clear_buffers;
+
+                INSERT_PADDING_WORDS(0x4B);

                struct {
                    u32 query_address_high;
@@ -766,6 +810,9 @@ private:
    /// Handles writes to the macro uploading registers.
    void ProcessMacroUpload(u32 data);

+    /// Handles a write to the CLEAR_BUFFERS register.
+    void ProcessClearBuffers();
+
    /// Handles a write to the QUERY_GET register.
    void ProcessQueryGet();

@@ -788,21 +835,27 @@ ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform[0], 0x280);
 ASSERT_REG_POSITION(viewport, 0x300);
 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
+ASSERT_REG_POSITION(clear_color[0], 0x360);
+ASSERT_REG_POSITION(clear_depth, 0x364);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
 ASSERT_REG_POSITION(rt_control, 0x487);
 ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
 ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
 ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
+ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
 ASSERT_REG_POSITION(depth_test_func, 0x4C3);
 ASSERT_REG_POSITION(blend, 0x4CF);
+ASSERT_REG_POSITION(screen_y_control, 0x4EB);
 ASSERT_REG_POSITION(vb_element_base, 0x50D);
 ASSERT_REG_POSITION(tsc, 0x557);
 ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(point_coord_replace, 0x581);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
 ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(cull, 0x646);
+ASSERT_REG_POSITION(clear_buffers, 0x674);
 ASSERT_REG_POSITION(query, 0x6C0);
 ASSERT_REG_POSITION(vertex_array[0], 0x700);
 ASSERT_REG_POSITION(independent_blend, 0x780);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -328,6 +328,19 @@ union Instruction {
        BitField<49, 3, PredCondition> cond;
    } isetp;

+    union {
+        BitField<0, 3, u64> pred0;
+        BitField<3, 3, u64> pred3;
+        BitField<12, 3, u64> pred12;
+        BitField<15, 1, u64> neg_pred12;
+        BitField<24, 2, PredOperation> cond;
+        BitField<29, 3, u64> pred29;
+        BitField<32, 1, u64> neg_pred29;
+        BitField<39, 3, u64> pred39;
+        BitField<42, 1, u64> neg_pred39;
+        BitField<45, 2, PredOperation> op;
+    } psetp;
+
    union {
        BitField<39, 3, u64> pred39;
        BitField<42, 1, u64> neg_pred;
@@ -641,7 +654,7 @@ private:
            INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
            INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
            INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
-            INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
+            INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
            INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
            INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
            INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -19,6 +19,9 @@ public:
    /// Draw the current batch of vertex arrays
    virtual void DrawArrays() = 0;

+    /// Clear the current framebuffer
+    virtual void Clear() = 0;
+
    /// Notify rasterizer that the specified Maxwell register has been changed
    virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -297,11 +297,8 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
    return true;
 }

-void RasterizerOpenGL::DrawArrays() {
-    if (accelerate_draw == AccelDraw::Disabled)
-        return;
-
-    MICROPROFILE_SCOPE(OpenGL_Drawing);
+std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb,
+                                                                    bool using_depth_fb) {
    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;

    // Sync the depth test state before configuring the framebuffer surfaces.
@@ -310,9 +307,6 @@ void RasterizerOpenGL::DrawArrays() {
    // TODO(bunnei): Implement this
    const bool has_stencil = false;

-    const bool using_color_fb = true;
-    const bool using_depth_fb = regs.zeta.Address() != 0;
-
    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};

    const bool write_color_fb =
@@ -344,11 +338,6 @@ void RasterizerOpenGL::DrawArrays() {
    BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);

    SyncViewport(surfaces_rect);
-    SyncBlendState();
-    SyncCullMode();
-
-    // TODO(bunnei): Sync framebuffer_scale uniform here
-    // TODO(bunnei): Sync scissorbox uniform(s) here

    // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
    // scissor test to prevent drawing outside of the framebuffer region
@@ -359,6 +348,66 @@ void RasterizerOpenGL::DrawArrays() {
    state.scissor.height = draw_rect.GetHeight();
    state.Apply();

+    // Only return the surface to be marked as dirty if writing to it is enabled.
+    return std::make_pair(write_color_fb ? color_surface : nullptr,
+                          write_depth_fb ? depth_surface : nullptr);
+}
+
+void RasterizerOpenGL::Clear() {
+    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+    bool use_color_fb = false;
+    bool use_depth_fb = false;
+
+    GLbitfield clear_mask = 0;
+    if (regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B &&
+        regs.clear_buffers.A) {
+        clear_mask |= GL_COLOR_BUFFER_BIT;
+        use_color_fb = true;
+    }
+    if (regs.clear_buffers.Z) {
+        clear_mask |= GL_DEPTH_BUFFER_BIT;
+        use_depth_fb = true;
+    }
+
+    if (clear_mask == 0)
+        return;
+
+    auto [dirty_color_surface, dirty_depth_surface] =
+        ConfigureFramebuffers(use_color_fb, use_depth_fb);
+
+    // TODO(Subv): Support clearing only partial colors.
+    glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2],
+                 regs.clear_color[3]);
+    glClearDepth(regs.clear_depth);
+
+    glClear(clear_mask);
+
+    // Mark framebuffer surfaces as dirty
+    if (dirty_color_surface != nullptr) {
+        res_cache.MarkSurfaceAsDirty(dirty_color_surface);
+    }
+    if (dirty_depth_surface != nullptr) {
+        res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
+    }
+}
+
+void RasterizerOpenGL::DrawArrays() {
+    if (accelerate_draw == AccelDraw::Disabled)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_Drawing);
+    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+    auto [dirty_color_surface, dirty_depth_surface] =
+        ConfigureFramebuffers(true, regs.zeta.Address() != 0);
+
+    SyncBlendState();
+    SyncCullMode();
+
+    // TODO(bunnei): Sync framebuffer_scale uniform here
+    // TODO(bunnei): Sync scissorbox uniform(s) here
+
    // Draw the vertex batch
    const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
    const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
@@ -439,11 +488,11 @@ void RasterizerOpenGL::DrawArrays() {
    state.Apply();

    // Mark framebuffer surfaces as dirty
-    if (color_surface != nullptr && write_color_fb) {
-        res_cache.MarkSurfaceAsDirty(color_surface);
+    if (dirty_color_surface != nullptr) {
+        res_cache.MarkSurfaceAsDirty(dirty_color_surface);
    }
-    if (depth_surface != nullptr && write_depth_fb) {
-        res_cache.MarkSurfaceAsDirty(depth_surface);
+    if (dirty_depth_surface != nullptr) {
+        res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
    }
 }

@@ -637,7 +686,10 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,

        // Bind the uniform to the sampler.
        GLint uniform = glGetUniformLocation(program, entry.GetName().c_str());
-        ASSERT(uniform != -1);
+        if (uniform == -1) {
+            continue;
+        }
+
        glProgramUniform1i(program, uniform, current_bindpoint);

        const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
@@ -722,6 +774,16 @@ void RasterizerOpenGL::SyncCullMode() {
    if (state.cull.enabled) {
        state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
        state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
+
+        // If the GPU is configured to flip the rasterized triangles, then we need to flip the
+        // notion of front and back. Note: We flip the triangles when the value of the register is 0
+        // because OpenGL already does it for us.
+        if (regs.screen_y_control.triangle_rast_flip == 0) {
+            if (state.cull.front_face == GL_CCW)
+                state.cull.front_face = GL_CW;
+            else if (state.cull.front_face == GL_CW)
+                state.cull.front_face = GL_CCW;
+        }
    }
 }

--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -7,6 +7,7 @@
 #include <array>
 #include <cstddef>
 #include <memory>
+#include <utility>
 #include <vector>
 #include <glad/glad.h>
 #include "common/common_types.h"
@@ -28,6 +29,7 @@ public:
    ~RasterizerOpenGL() override;

    void DrawArrays() override;
+    void Clear() override;
    void NotifyMaxwellRegisterChanged(u32 method) override;
    void FlushAll() override;
    void FlushRegion(Tegra::GPUVAddr addr, u64 size) override;
@@ -81,6 +83,10 @@ private:
        u32 border_color_a;
    };

+    /// Configures the color and depth framebuffer states and returns the dirty <Color, Depth>
+    /// surfaces if writing was enabled.
+    std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb);
+
    /// Binds the framebuffer color and depth surface
    void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
                                 bool has_stencil);
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -65,6 +65,25 @@ struct FormatTuple {
    return params;
 }

+/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
+    const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address,
+    Tegra::DepthFormat format) {
+
+    SurfaceParams params{};
+    params.addr = zeta_address;
+    params.is_tiled = true;
+    params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
+    params.pixel_format = PixelFormatFromDepthFormat(format);
+    params.component_type = ComponentTypeFromDepthFormat(format);
+    params.type = GetFormatType(params.pixel_format);
+    params.size_in_bytes = params.SizeInBytes();
+    params.width = config.width;
+    params.height = config.height;
+    params.unaligned_height = config.height;
+    params.size_in_bytes = params.SizeInBytes();
+    return params;
+}
+
 static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8
    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false},    // B5G6R5
@@ -88,6 +107,9 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
    // DepthStencil formats
    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
     false}, // Z24S8
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
+     false},                                                                            // S8Z24
+    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
 }};

 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -131,13 +153,6 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
    return {0, actual_height, width, 0};
 }

-static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) {
-    u32 block_width{};
-    u32 block_height{};
-    std::tie(block_width, block_height) = GetASTCBlockSize(format);
-    data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
-}
-
 template <bool morton_to_gl, PixelFormat format>
 void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) {
    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
@@ -177,6 +192,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
        MortonCopy<true, PixelFormat::DXT1>,         MortonCopy<true, PixelFormat::DXT23>,
        MortonCopy<true, PixelFormat::DXT45>,        MortonCopy<true, PixelFormat::DXN1>,
        MortonCopy<true, PixelFormat::ASTC_2D_4X4>,  MortonCopy<true, PixelFormat::Z24S8>,
+        MortonCopy<true, PixelFormat::S8Z24>,        MortonCopy<true, PixelFormat::Z32F>,
 };

 static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
@@ -197,6 +213,8 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
        nullptr,
        MortonCopy<false, PixelFormat::ABGR8>,
        MortonCopy<false, PixelFormat::Z24S8>,
+        MortonCopy<false, PixelFormat::S8Z24>,
+        MortonCopy<false, PixelFormat::Z32F>,
 };

 // Allocate an uninitialized texture of appropriate size and format for the surface
@@ -234,6 +252,71 @@ CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
                           rect.GetWidth(), rect.GetHeight());
 }

+static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
+    union S8Z24 {
+        BitField<0, 24, u32> z24;
+        BitField<24, 8, u32> s8;
+    };
+    static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
+
+    union Z24S8 {
+        BitField<0, 8, u32> s8;
+        BitField<8, 24, u32> z24;
+    };
+    static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
+
+    S8Z24 input_pixel{};
+    Z24S8 output_pixel{};
+    for (size_t y = 0; y < height; ++y) {
+        for (size_t x = 0; x < width; ++x) {
+            const size_t offset{y * width + x};
+            std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
+            output_pixel.s8.Assign(input_pixel.s8);
+            output_pixel.z24.Assign(input_pixel.z24);
+            std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8));
+        }
+    }
+}
+/**
+ * Helper function to perform software conversion (as needed) when loading a buffer from Switch
+ * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
+ * typical desktop GPUs.
+ */
+static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
+                                               u32 width, u32 height) {
+    switch (pixel_format) {
+    case PixelFormat::ASTC_2D_4X4: {
+        // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
+        u32 block_width{};
+        u32 block_height{};
+        std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
+        data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
+        break;
+    }
+    case PixelFormat::S8Z24:
+        // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
+        ConvertS8Z24ToZ24S8(data, width, height);
+        break;
+    }
+}
+
+/**
+ * Helper function to perform software conversion (as needed) when flushing a buffer to Switch
+ * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
+ * typical desktop GPUs.
+ */
+static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& /*data*/, PixelFormat pixel_format,
+                                                u32 /*width*/, u32 /*height*/) {
+    switch (pixel_format) {
+    case PixelFormat::ASTC_2D_4X4:
+    case PixelFormat::S8Z24:
+        LOG_CRITICAL(Render_OpenGL, "Unimplemented pixel_format={}",
+                     static_cast<u32>(pixel_format));
+        UNREACHABLE();
+        break;
+    }
+}
+
 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
 void CachedSurface::LoadGLBuffer() {
    ASSERT(params.type != SurfaceType::Fill);
@@ -256,10 +339,7 @@ void CachedSurface::LoadGLBuffer() {
            params.width, params.block_height, params.height, gl_buffer.data(), params.addr);
    }

-    if (IsPixelFormatASTC(params.pixel_format)) {
-        // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this
-        ConvertASTCToRGBA8(gl_buffer, params.pixel_format, params.width, params.height);
-    }
+    ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height);
 }

 MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
@@ -272,6 +352,9 @@ void CachedSurface::FlushGLBuffer() {

    MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);

+    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
+                                        params.height);
+
    if (!params.is_tiled) {
        std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes);
    } else {
@@ -399,15 +482,16 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
    LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!");

    // get color and depth surfaces
-    const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(regs.rt[0])};
-    SurfaceParams depth_params{color_params};
+    SurfaceParams color_params{};
+    SurfaceParams depth_params{};
+
+    if (using_color_fb) {
+        color_params = SurfaceParams::CreateForFramebuffer(regs.rt[0]);
+    }

    if (using_depth_fb) {
-        depth_params.addr = regs.zeta.Address();
-        depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(regs.zeta.format);
-        depth_params.component_type = SurfaceParams::ComponentTypeFromDepthFormat(regs.zeta.format);
-        depth_params.type = SurfaceParams::GetFormatType(depth_params.pixel_format);
-        depth_params.size_in_bytes = depth_params.SizeInBytes();
+        depth_params =
+            SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format);
    }

    MathUtil::Rectangle<u32> color_rect{};
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -41,6 +41,8 @@ struct SurfaceParams {

        // DepthStencil formats
        Z24S8 = 13,
+        S8Z24 = 14,
+        Z32F = 15,

        MaxDepthStencilFormat,

@@ -92,6 +94,8 @@ struct SurfaceParams {
            4, // DXN1
            4, // ASTC_2D_4X4
            1, // Z24S8
+            1, // S8Z24
+            1, // Z32F
        }};

        ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -117,6 +121,8 @@ struct SurfaceParams {
            64,  // DXN1
            32,  // ASTC_2D_4X4
            32,  // Z24S8
+            32,  // S8Z24
+            32,  // Z32F
        }};

        ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -128,8 +134,12 @@ struct SurfaceParams {

    static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
        switch (format) {
+        case Tegra::DepthFormat::S8_Z24_UNORM:
+            return PixelFormat::S8Z24;
        case Tegra::DepthFormat::Z24_S8_UNORM:
            return PixelFormat::Z24S8;
+        case Tegra::DepthFormat::Z32_FLOAT:
+            return PixelFormat::Z32F;
        default:
            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
@@ -226,8 +236,12 @@ struct SurfaceParams {

    static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) {
        switch (format) {
+        case PixelFormat::S8Z24:
+            return Tegra::DepthFormat::S8_Z24_UNORM;
        case PixelFormat::Z24S8:
            return Tegra::DepthFormat::Z24_S8_UNORM;
+        case PixelFormat::Z32F:
+            return Tegra::DepthFormat::Z32_FLOAT;
        default:
            UNREACHABLE();
        }
@@ -274,8 +288,11 @@ struct SurfaceParams {

    static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
        switch (format) {
+        case Tegra::DepthFormat::S8_Z24_UNORM:
        case Tegra::DepthFormat::Z24_S8_UNORM:
            return ComponentType::UNorm;
+        case Tegra::DepthFormat::Z32_FLOAT:
+            return ComponentType::Float;
        default:
            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
@@ -318,13 +335,18 @@ struct SurfaceParams {
        return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
    }

-    /// Creates SurfaceParams from a texture configation
+    /// Creates SurfaceParams from a texture configuration
    static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);

-    /// Creates SurfaceParams from a framebuffer configation
+    /// Creates SurfaceParams from a framebuffer configuration
    static SurfaceParams CreateForFramebuffer(
        const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);

+    /// Creates SurfaceParams for a depth buffer configuration
+    static SurfaceParams CreateForDepthBuffer(
+        const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config,
+        Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format);
+
    Tegra::GPUVAddr addr;
    bool is_tiled;
    u32 block_height;
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1213,6 +1213,9 @@ private:
                switch (instr.conversion.f2f.rounding) {
                case Tegra::Shader::F2fRoundingOp::None:
                    break;
+                case Tegra::Shader::F2fRoundingOp::Round:
+                    op_a = "roundEven(" + op_a + ')';
+                    break;
                case Tegra::Shader::F2fRoundingOp::Floor:
                    op_a = "floor(" + op_a + ')';
                    break;
@@ -1477,6 +1480,36 @@ private:
            }
            break;
        }
+        case OpCode::Type::PredicateSetPredicate: {
+            std::string op_a =
+                GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
+            std::string op_b =
+                GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
+
+            using Tegra::Shader::Pred;
+            // We can't use the constant predicate as destination.
+            ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+            std::string second_pred =
+                GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
+
+            std::string combiner = GetPredicateCombiner(instr.psetp.op);
+
+            std::string predicate =
+                '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
+
+            // Set the primary predicate to the result of Predicate OP SecondPredicate
+            SetPredicate(instr.psetp.pred3,
+                         '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+
+            if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+                // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+                // if enabled
+                SetPredicate(instr.psetp.pred0,
+                             "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+            }
+            break;
+        }
        case OpCode::Type::FloatSet: {
            std::string op_a = instr.fset.neg_a ? "-" : "";
            op_a += regs.GetRegisterAsFloat(instr.gpr8);
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -29,6 +29,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        switch (attrib.size) {
        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
            return GL_UNSIGNED_BYTE;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return GL_UNSIGNED_SHORT;
+        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+            return GL_UNSIGNED_INT_2_10_10_10_REV;
        }

        LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
@@ -41,6 +45,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        switch (attrib.size) {
        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
            return GL_BYTE;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return GL_SHORT;
+        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+            return GL_INT_2_10_10_10_REV;
        }

        LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
@@ -203,20 +211,28 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
 inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
    switch (comparison) {
    case Maxwell::ComparisonOp::Never:
+    case Maxwell::ComparisonOp::NeverOld:
        return GL_NEVER;
    case Maxwell::ComparisonOp::Less:
+    case Maxwell::ComparisonOp::LessOld:
        return GL_LESS;
    case Maxwell::ComparisonOp::Equal:
+    case Maxwell::ComparisonOp::EqualOld:
        return GL_EQUAL;
    case Maxwell::ComparisonOp::LessEqual:
+    case Maxwell::ComparisonOp::LessEqualOld:
        return GL_LEQUAL;
    case Maxwell::ComparisonOp::Greater:
+    case Maxwell::ComparisonOp::GreaterOld:
        return GL_GREATER;
    case Maxwell::ComparisonOp::NotEqual:
+    case Maxwell::ComparisonOp::NotEqualOld:
        return GL_NOTEQUAL;
    case Maxwell::ComparisonOp::GreaterEqual:
+    case Maxwell::ComparisonOp::GreaterEqualOld:
        return GL_GEQUAL;
    case Maxwell::ComparisonOp::Always:
+    case Maxwell::ComparisonOp::AlwaysOld:
        return GL_ALWAYS;
    }
    LOG_CRITICAL(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -76,7 +76,9 @@ u32 BytesPerPixel(TextureFormat format) {

 static u32 DepthBytesPerPixel(DepthFormat format) {
    switch (format) {
+    case DepthFormat::S8_Z24_UNORM:
    case DepthFormat::Z24_S8_UNORM:
+    case DepthFormat::Z32_FLOAT:
        return 4;
    default:
        UNIMPLEMENTED_MSG("Format not implemented");
@@ -129,7 +131,9 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid
    std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);

    switch (format) {
+    case DepthFormat::S8_Z24_UNORM:
    case DepthFormat::Z24_S8_UNORM:
+    case DepthFormat::Z32_FLOAT:
        CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
                         unswizzled_data.data(), true, block_height);
        break;
Author	SHA1	Message	Date
Subv	9f6a5660e8	GPU: Allow using the old NV04 values for the depth test function. These seem to be just a valid as the GL token values. Thanks @ReinUsesLisp This restores graphical output to Disgaea 5	2018-07-05 13:01:31 -05:00
bunnei	637f9d780a	Merge pull request #624 from Subv/f2f_round GPU: Implemented the F2F 'round' rounding mode.	2018-07-05 11:30:29 -04:00
bunnei	956b5db52e	Merge pull request #623 from Subv/vertex_types GPU: Implement the Size_16_16 and Size_10_10_10_2 vertex attribute types	2018-07-05 11:30:01 -04:00
bunnei	8b815877a6	Merge pull request #622 from Subv/unused_tex GPU: Ignore unused textures and corrected the TEX shader instruction decoding.	2018-07-05 11:29:17 -04:00
bunnei	1b0a74e23f	Merge pull request #621 from Subv/psetp_ GPU: Implemented the PSETP shader instruction.	2018-07-05 11:28:50 -04:00
bunnei	9a3c0b161e	Merge pull request #620 from Subv/depth_z32f GPU: Implemented the 32 bit float depth buffer format.	2018-07-05 11:09:15 -04:00
Subv	d800a02b4b	GPU: Implemented the F2F 'round' rounding mode. It's implemented via the GLSL 'roundEven()' function.	2018-07-04 15:43:21 -05:00
Subv	ce39ae3e57	GPU: Implement the Size_16_16 and Size_10_10_10_2 vertex attribute types. Both signed and unsigned variants.	2018-07-04 15:22:34 -05:00
Subv	4bda9693be	GPU: Ignore textures that the GLSL compiler deemed unused when binding textures to the shaders.	2018-07-04 15:20:12 -05:00
Subv	c42b818cf9	GPU: Corrected the decoding for the TEX shader instruction.	2018-07-04 15:19:20 -05:00
Subv	53a55bd751	GPU: Implemented the PSETP shader instruction. It's similar to the isetp and fsetp instructions but it works on predicates instead.	2018-07-04 15:15:03 -05:00
bunnei	2355460d7c	Merge pull request #619 from Subv/flip_cull GPU: Flip the triangle front face winding if the GPU is configured to not flip the triangles.	2018-07-04 12:13:38 -04:00
Subv	016e357c75	GPU: Implemented the 32 bit float depth buffer format.	2018-07-04 10:42:33 -05:00
Subv	c1bebdef5e	GPU: Flip the triangle front face winding if the GPU is configured to not flip the triangles. OpenGL's default behavior is already correct when the GPU is configured to flip the triangles. This fixes 1-2 Switch's splash screen.	2018-07-04 10:26:46 -05:00
bunnei	81a44d38ee	Merge pull request #618 from Subv/clear_used_buffers GPU: Only configure the used framebuffers during clear.	2018-07-04 00:12:46 -04:00
Subv	5a9df3c675	GPU: Only configure the used framebuffers during clear. Don't try to configure the color buffer if it is not being cleared, it may not be completely valid at this point.	2018-07-03 22:32:59 -05:00
bunnei	c996787d84	Merge pull request #609 from Subv/clear_buffers GPU: Implemented the CLEAR_BUFFERS register.	2018-07-03 19:34:34 -04:00
bunnei	4030f600dc	Merge pull request #616 from bunnei/s8z24 gl_rasterizer_cache: Implement PixelFormat S8Z24.	2018-07-03 18:26:31 -04:00
Subv	78443a7f29	GPU: Factor out the framebuffer configuration code for both Clear and Draw commands.	2018-07-03 16:56:47 -05:00
Subv	c1811ed3d1	GPU: Support clears that don't clear the color buffer.	2018-07-03 16:56:47 -05:00
Subv	be51120d23	GPU: Bind and clear the render target when the CLEAR_BUFFERS register is written to.	2018-07-03 16:56:44 -05:00
Subv	827bb08c91	GPU: Added registers for the CLEAR_BUFFERS and CLEAR_COLOR methods.	2018-07-03 16:56:31 -05:00
bunnei	c164f02c48	Merge pull request #613 from jroweboy/qt-style Add qt windowsvistastyle dll to the build	2018-07-03 17:48:29 -04:00
bunnei	9da1552417	gl_rasterizer_cache: Implement PixelFormat S8Z24.	2018-07-03 14:58:13 -04:00
James Rowe	6ff20dc6a7	Add qt windowsvistastyle dll to the build	2018-07-03 14:44:13 -04:00
David	3dab0e284b	Update AudioRenderer Voice Sections (#614 ) * voice section updating * fixed slight offset miscalculation * fixed overflow	2018-07-03 13:09:10 -04:00