gl_device: Enable compute shaders for Intel Mesa drivers

Previously we naively checked for "Intel" in GL_VENDOR, but this includes both Intel's proprietary driver and the mesa driver. Re-enable compute shaders for mesa.
Merge pull request #3201 from lioncash/dump
2019-12-11 00:00:30 -03:00 · 2019-12-10 21:48:37 -05:00 · 2019-12-10 21:20:52 -03:00 · 2019-12-10 19:51:46 -04:00 · 2019-12-10 08:01:41 -04:00 · 2019-12-10 01:55:38 -03:00
45 changed files with 2326 additions and 920 deletions
--- a/.ci/scripts/windows/upload.ps1
+++ b/.ci/scripts/windows/upload.ps1
@@ -39,6 +39,7 @@ mkdir "artifacts"
 # Build a tar.xz for the source of the release
 Copy-Item .\license.txt -Destination $MSVC_SOURCE
 Copy-Item .\README.md -Destination $MSVC_SOURCE
+Copy-Item .\CMakeLists.txt -Destination $MSVC_SOURCE
 Copy-Item .\src -Recurse -Destination $MSVC_SOURCE
 Copy-Item .\externals -Recurse -Destination $MSVC_SOURCE
 Copy-Item .\dist -Recurse -Destination $MSVC_SOURCE
@@ -60,4 +61,4 @@ Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Ite

 Get-ChildItem . -Filter "*.zip" | Copy-Item -destination "artifacts"
 Get-ChildItem . -Filter "*.7z" | Copy-Item -destination "artifacts"
-Get-ChildItem . -Filter "*.tar.xz" | Copy-Item -destination "artifacts"
+Get-ChildItem . -Filter "*.tar.xz" | Copy-Item -destination "artifacts"
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,7 +29,7 @@ option(ENABLE_VULKAN "Enables Vulkan backend" ON)

 option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)

-if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
+if(EXISTS ${PROJECT_SOURCE_DIR}/hooks/pre-commit AND NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
    message(STATUS "Copying pre-commit hook")
    file(COPY hooks/pre-commit
        DESTINATION ${PROJECT_SOURCE_DIR}/.git/hooks)
@@ -49,7 +49,10 @@ function(check_submodules_present)
        endif()
    endforeach()
 endfunction()
-check_submodules_present()
+
+if(EXISTS ${PROJECT_SOURCE_DIR}/.gitmodules)
+    check_submodules_present()
+endif()

 configure_file(${PROJECT_SOURCE_DIR}/dist/compatibility_list/compatibility_list.qrc
               ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc
--- a/externals/Vulkan-Headers
+++ b/externals/Vulkan-Headers
--- a/externals/sirit
+++ b/externals/sirit
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -96,6 +96,8 @@ void Cpu::RunLoop(bool tight_loop) {
        } else {
            arm_interface->Step();
        }
+        // We are stopping a run, exclusive state must be cleared
+        arm_interface->ClearExclusiveState();
    }
    core_timing.Advance();

--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -11,8 +11,6 @@
 #include "core/core_cpu.h"
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/object.h"
-#include "core/hle/kernel/process.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/result.h"
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,10 +4,10 @@

 #pragma once

+#include <memory>
 #include <vector>

 #include "common/common_types.h"
-#include "core/hle/kernel/object.h"

 union ResultCode;

--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -8,7 +8,6 @@
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/server_port.h"
-#include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"

 namespace Kernel {
--- a/src/core/hle/kernel/client_port.h
+++ b/src/core/hle/kernel/client_port.h
@@ -4,7 +4,9 @@

 #pragma once

+#include <memory>
 #include <string>
+
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/result.h"
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -6,6 +6,8 @@

 #include <array>
 #include <cstddef>
+#include <memory>
+
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/result.h"
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -13,7 +13,6 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
-#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <memory>
 #include <utility>
 #include <vector>

--- a/src/core/hle/kernel/resource_limit.h
+++ b/src/core/hle/kernel/resource_limit.h
@@ -5,6 +5,8 @@
 #pragma once

 #include <array>
+#include <memory>
+
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"

--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -458,7 +458,6 @@ void Scheduler::SwitchContext() {
        cpu_core.LoadContext(new_thread->GetContext());
        cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
        cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
-        cpu_core.ClearExclusiveState();
    } else {
        current_thread = nullptr;
        // Note: We do not reset the current process and current page table when idling because
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -4,11 +4,12 @@

 #pragma once

-#include <mutex>
+#include <atomic>
+#include <memory>
 #include <vector>
+
 #include "common/common_types.h"
 #include "common/multi_level_queue.h"
-#include "core/hle/kernel/object.h"
 #include "core/hle/kernel/thread.h"

 namespace Core {
--- a/src/core/hle/kernel/session.h
+++ b/src/core/hle/kernel/session.h
@@ -6,9 +6,9 @@

 #include <memory>
 #include <string>
+#include <utility>

 #include "core/hle/kernel/wait_object.h"
-#include "core/hle/result.h"

 namespace Kernel {

--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -6,7 +6,6 @@

 #include <memory>
 #include <string>
-#include <vector>

 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1781,6 +1781,17 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
    return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
 }

+static void KernelDebug([[maybe_unused]] Core::System& system,
+                        [[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1,
+                        [[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) {
+    // Intentionally do nothing, as this does nothing in released kernel binaries.
+}
+
+static void ChangeKernelTraceState([[maybe_unused]] Core::System& system,
+                                   [[maybe_unused]] u32 trace_state) {
+    // Intentionally do nothing, as this does nothing in released kernel binaries.
+}
+
 /// This returns the total CPU ticks elapsed since the CPU was powered-on
 static u64 GetSystemTick(Core::System& system) {
    LOG_TRACE(Kernel_SVC, "called");
@@ -2418,8 +2429,8 @@ static const FunctionDef SVC_Table[] = {
    {0x39, nullptr, "Unknown"},
    {0x3A, nullptr, "Unknown"},
    {0x3B, nullptr, "Unknown"},
-    {0x3C, nullptr, "DumpInfo"},
-    {0x3D, nullptr, "DumpInfoNew"},
+    {0x3C, SvcWrap<KernelDebug>, "KernelDebug"},
+    {0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"},
    {0x3E, nullptr, "Unknown"},
    {0x3F, nullptr, "Unknown"},
    {0x40, nullptr, "CreateSession"},
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -311,6 +311,17 @@ void SvcWrap(Core::System& system) {
    func(system);
 }

+template <void func(Core::System&, u32)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)));
+}
+
+template <void func(Core::System&, u32, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2),
+         Param(system, 3));
+}
+
 template <void func(Core::System&, s64)>
 void SvcWrap(Core::System& system) {
    func(system, static_cast<s64>(Param(system, 0)));
--- a/src/core/hle/kernel/transfer_memory.h
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -5,7 +5,6 @@
 #pragma once

 #include <memory>
-#include <vector>

 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/physical_memory.h"
--- a/src/core/hle/kernel/wait_object.h
+++ b/src/core/hle/kernel/wait_object.h
@@ -4,8 +4,9 @@

 #pragma once

+#include <memory>
 #include <vector>
-#include <boost/smart_ptr/intrusive_ptr.hpp>
+
 #include "core/hle/kernel/object.h"

 namespace Kernel {
--- a/src/core/hle/kernel/writable_event.h
+++ b/src/core/hle/kernel/writable_event.h
@@ -4,6 +4,8 @@

 #pragma once

+#include <memory>
+
 #include "core/hle/kernel/object.h"

 namespace Kernel {
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -310,6 +310,11 @@ public:
            }
        };

+        enum class DepthMode : u32 {
+            MinusOneToOne = 0,
+            ZeroToOne = 1,
+        };
+
        enum class PrimitiveTopology : u32 {
            Points = 0x0,
            Lines = 0x1,
@@ -491,6 +496,18 @@ public:
            INSERT_UNION_PADDING_WORDS(1);
        };

+        enum class TessellationPrimitive : u32 {
+            Isolines = 0,
+            Triangles = 1,
+            Quads = 2,
+        };
+
+        enum class TessellationSpacing : u32 {
+            Equal = 0,
+            FractionalOdd = 1,
+            FractionalEven = 2,
+        };
+
        struct RenderTargetConfig {
            u32 address_high;
            u32 address_low;
@@ -628,7 +645,19 @@ public:
                    };
                } sync_info;

-                INSERT_UNION_PADDING_WORDS(0x11E);
+                INSERT_UNION_PADDING_WORDS(0x15);
+
+                union {
+                    BitField<0, 2, TessellationPrimitive> prim;
+                    BitField<4, 2, TessellationSpacing> spacing;
+                    BitField<8, 1, u32> cw;
+                    BitField<9, 1, u32> connected;
+                } tess_mode;
+
+                std::array<f32, 4> tess_level_outer;
+                std::array<f32, 2> tess_level_inner;
+
+                INSERT_UNION_PADDING_WORDS(0x102);

                u32 tfb_enabled;

@@ -647,7 +676,7 @@ public:
                    u32 count;
                } vertex_buffer;

-                INSERT_UNION_PADDING_WORDS(1);
+                DepthMode depth_mode;

                float clear_color[4];
                float clear_depth;
@@ -662,7 +691,9 @@ public:
                u32 polygon_offset_line_enable;
                u32 polygon_offset_fill_enable;

-                INSERT_UNION_PADDING_WORDS(0xD);
+                u32 patch_vertices;
+
+                INSERT_UNION_PADDING_WORDS(0xC);

                std::array<ScissorTest, NumViewports> scissor_test;

@@ -1386,17 +1417,22 @@ ASSERT_REG_POSITION(upload, 0x60);
 ASSERT_REG_POSITION(exec_upload, 0x6C);
 ASSERT_REG_POSITION(data_upload, 0x6D);
 ASSERT_REG_POSITION(sync_info, 0xB2);
+ASSERT_REG_POSITION(tess_mode, 0xC8);
+ASSERT_REG_POSITION(tess_level_outer, 0xC9);
+ASSERT_REG_POSITION(tess_level_inner, 0xCD);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform, 0x280);
 ASSERT_REG_POSITION(viewports, 0x300);
 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
+ASSERT_REG_POSITION(depth_mode, 0x35F);
 ASSERT_REG_POSITION(clear_color[0], 0x360);
 ASSERT_REG_POSITION(clear_depth, 0x364);
 ASSERT_REG_POSITION(clear_stencil, 0x368);
 ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
 ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
 ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
+ASSERT_REG_POSITION(patch_vertices, 0x373);
 ASSERT_REG_POSITION(scissor_test, 0x380);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -98,10 +98,11 @@ union Attribute {
        BitField<20, 10, u64> immediate;
        BitField<22, 2, u64> element;
        BitField<24, 6, Index> index;
+        BitField<31, 1, u64> patch;
        BitField<47, 3, AttributeSize> size;

        bool IsPhysical() const {
-            return element == 0 && static_cast<u64>(index.Value()) == 0;
+            return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
        }
    } fmt20;

@@ -799,6 +800,12 @@ union Instruction {
        BitField<40, 1, u64> invert;
    } popc;

+    union {
+        BitField<41, 1, u64> sh;
+        BitField<40, 1, u64> invert;
+        BitField<48, 1, u64> is_signed;
+    } flo;
+
    union {
        BitField<39, 3, u64> pred;
        BitField<42, 1, u64> neg_pred;
@@ -1439,6 +1446,26 @@ union Instruction {
        }
    } tlds;

+    union {
+        BitField<28, 1, u64> is_array;
+        BitField<29, 2, TextureType> texture_type;
+        BitField<35, 1, u64> aoffi_flag;
+        BitField<49, 1, u64> nodep_flag;
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::AOFFI:
+                return aoffi_flag != 0;
+            case TextureMiscMode::NODEP:
+                return nodep_flag != 0;
+            default:
+                break;
+            }
+            return false;
+        }
+
+    } txd;
+
    union {
        BitField<24, 2, StoreCacheManagement> cache_management;
        BitField<33, 3, ImageType> image_type;
@@ -1632,6 +1659,8 @@ public:
        TLD4S,  // Texture Load 4 with scalar / non - vec4 source / destinations
        TMML_B, // Texture Mip Map Level
        TMML,   // Texture Mip Map Level
+        TXD,    // Texture Gradient/Load with Derivates
+        TXD_B,  // Texture Gradient/Load with Derivates Bindless
        SUST,   // Surface Store
        SULD,   // Surface Load
        SUATOM, // Surface Atomic Operation
@@ -1664,6 +1693,9 @@ public:
        ISCADD_C, // Scale and Add
        ISCADD_R,
        ISCADD_IMM,
+        FLO_R,
+        FLO_C,
+        FLO_IMM,
        LEA_R1,
        LEA_R2,
        LEA_RZ,
@@ -1727,6 +1759,10 @@ public:
        SHR_C,
        SHR_R,
        SHR_IMM,
+        SHF_RIGHT_R,
+        SHF_RIGHT_IMM,
+        SHF_LEFT_R,
+        SHF_LEFT_IMM,
        FMNMX_C,
        FMNMX_R,
        FMNMX_IMM,
@@ -1924,6 +1960,8 @@ private:
            INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
            INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
            INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
+            INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
+            INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
            INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
            INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
            INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
@@ -1965,6 +2003,9 @@ private:
            INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
            INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
            INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
+            INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
+            INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
+            INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
            INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
            INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
            INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
@@ -2022,6 +2063,10 @@ private:
            INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
            INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
            INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
+            INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
+            INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
+            INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
+            INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
            INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
            INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
            INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <array>
 #include <cstddef>
+#include <cstring>
 #include <optional>
 #include <vector>

@@ -134,11 +135,13 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin

 Device::Device() : base_bindings{BuildBaseBindings()} {
    const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
+    const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
    const std::vector extensions = GetExtensions();

    const bool is_nvidia = vendor == "NVIDIA Corporation";
    const bool is_amd = vendor == "ATI Technologies Inc.";
    const bool is_intel = vendor == "Intel";
+    const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;

    uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
    shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -152,7 +155,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
    has_variable_aoffi = TestVariableAoffi();
    has_component_indexing_bug = is_amd;
    has_precise_bug = TestPreciseBug();
-    has_broken_compute = is_intel;
+    has_broken_compute = is_intel_proprietary;
    has_fast_buffer_sub_data = is_nvidia;

    LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1028,6 +1028,10 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
        flip_y = !flip_y;
    }
    state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
+    state.clip_control.depth_mode =
+        regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne
+            ? GL_ZERO_TO_ONE
+            : GL_NEGATIVE_ONE_TO_ONE;
 }

 void RasterizerOpenGL::SyncClipEnabled(
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -49,8 +49,9 @@ class ExprDecompiler;
 enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };

 struct TextureAoffi {};
+struct TextureDerivates {};
 using TextureArgument = std::pair<Type, Node>;
-using TextureIR = std::variant<TextureAoffi, TextureArgument>;
+using TextureIR = std::variant<TextureAoffi, TextureDerivates, TextureArgument>;

 constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
    static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
@@ -1112,6 +1113,8 @@ private:
                expr += GenerateTextureArgument(*argument);
            } else if (std::holds_alternative<TextureAoffi>(variant)) {
                expr += GenerateTextureAoffi(meta->aoffi);
+            } else if (std::holds_alternative<TextureDerivates>(variant)) {
+                expr += GenerateTextureDerivates(meta->derivates);
            } else {
                UNREACHABLE();
            }
@@ -1181,6 +1184,36 @@ private:
        return expr;
    }

+    std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
+        if (derivates.empty()) {
+            return {};
+        }
+        constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
+        std::string expr = ", ";
+        const std::size_t components = derivates.size() / 2;
+        std::string dx = coord_constructors.at(components - 1);
+        std::string dy = coord_constructors.at(components - 1);
+        dx += '(';
+        dy += '(';
+
+        for (std::size_t index = 0; index < components; ++index) {
+            const auto operand_x{derivates.at(index * 2)};
+            const auto operand_y{derivates.at(index * 2 + 1)};
+            dx += Visit(operand_x).AsFloat();
+            dy += Visit(operand_y).AsFloat();
+
+            if (index + 1 < components) {
+                dx += ", ";
+                dy += ", ";
+            }
+        }
+        dx += ')';
+        dy += ')';
+        expr += dx + ", " + dy;
+
+        return expr;
+    }
+
    std::string BuildIntegerCoordinates(Operation operation) {
        constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
        const std::size_t coords_count{operation.GetOperandsCount()};
@@ -1450,6 +1483,11 @@ private:
        return GenerateUnary(operation, "bitCount", type, type);
    }

+    template <Type type>
+    Expression BitMSB(Operation operation) {
+        return GenerateUnary(operation, "findMSB", type, type);
+    }
+
    Expression HNegate(Operation operation) {
        const auto GetNegate = [&](std::size_t index) {
            return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
@@ -1738,6 +1776,14 @@ private:
        return {std::move(expr), Type::Float};
    }

+    Expression TextureGradient(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        ASSERT(meta);
+
+        std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}});
+        return {std::move(expr) + GetSwizzle(meta->element), Type::Float};
+    }
+
    Expression ImageLoad(Operation operation) {
        if (!device.HasImageLoadFormatted()) {
            LOG_ERROR(Render_OpenGL,
@@ -1869,6 +1915,10 @@ private:
        return {};
    }

+    Expression InvocationId(Operation operation) {
+        return {"gl_InvocationID", Type::Int};
+    }
+
    Expression YNegate(Operation operation) {
        return {"y_direction", Type::Float};
    }
@@ -2003,6 +2053,7 @@ private:
        &GLSLDecompiler::BitfieldInsert<Type::Int>,
        &GLSLDecompiler::BitfieldExtract<Type::Int>,
        &GLSLDecompiler::BitCount<Type::Int>,
+        &GLSLDecompiler::BitMSB<Type::Int>,

        &GLSLDecompiler::Add<Type::Uint>,
        &GLSLDecompiler::Mul<Type::Uint>,
@@ -2021,6 +2072,7 @@ private:
        &GLSLDecompiler::BitfieldInsert<Type::Uint>,
        &GLSLDecompiler::BitfieldExtract<Type::Uint>,
        &GLSLDecompiler::BitCount<Type::Uint>,
+        &GLSLDecompiler::BitMSB<Type::Uint>,

        &GLSLDecompiler::Add<Type::HalfFloat>,
        &GLSLDecompiler::Mul<Type::HalfFloat>,
@@ -2084,6 +2136,7 @@ private:
        &GLSLDecompiler::TextureQueryDimensions,
        &GLSLDecompiler::TextureQueryLod,
        &GLSLDecompiler::TexelFetch,
+        &GLSLDecompiler::TextureGradient,

        &GLSLDecompiler::ImageLoad,
        &GLSLDecompiler::ImageStore,
@@ -2104,6 +2157,7 @@ private:
        &GLSLDecompiler::EmitVertex,
        &GLSLDecompiler::EndPrimitive,

+        &GLSLDecompiler::InvocationId,
        &GLSLDecompiler::YNegate,
        &GLSLDecompiler::LocalInvocationId<0>,
        &GLSLDecompiler::LocalInvocationId<1>,
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -411,8 +411,9 @@ void OpenGLState::ApplyAlphaTest() {
 }

 void OpenGLState::ApplyClipControl() {
-    if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) {
-        glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE);
+    if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
+                  std::tie(clip_control.origin, clip_control.depth_mode))) {
+        glClipControl(clip_control.origin, clip_control.depth_mode);
    }
 }

--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -150,6 +150,7 @@ public:

    struct {
        GLenum origin = GL_LOWER_LEFT;
+        GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
    } clip_control;

    OpenGLState();
--- a/src/video_core/renderer_vulkan/declarations.h
+++ b/src/video_core/renderer_vulkan/declarations.h
@@ -4,6 +4,17 @@

 #pragma once

+namespace vk {
+class DispatchLoaderDynamic;
+}
+
+namespace Vulkan {
+constexpr vk::DispatchLoaderDynamic* dont_use_me_dld = nullptr;
+}
+
+#define VULKAN_HPP_DEFAULT_DISPATCHER (*::Vulkan::dont_use_me_dld)
+#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0
+#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
 #include <vulkan/vulkan.hpp>

 namespace Vulkan {
@@ -41,5 +52,7 @@ using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
 using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
 using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
 using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
+using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>;
+using UniqueDebugUtilsMessengerEXT = UniqueHandle<vk::DebugUtilsMessengerEXT>;

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <bitset>
+#include <cstdlib>
 #include <optional>
 #include <set>
 #include <string_view>
@@ -15,6 +16,15 @@ namespace Vulkan {

 namespace {

+namespace Alternatives {
+
+constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
+                                           vk::Format::eD16UnormS8Uint, vk::Format{}};
+constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
+                                           vk::Format::eD32SfloatS8Uint, vk::Format{}};
+
+} // namespace Alternatives
+
 template <typename T>
 void SetNext(void**& next, T& data) {
    *next = &data;
@@ -22,7 +32,7 @@ void SetNext(void**& next, T& data) {
 }

 template <typename T>
-T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
+T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
    vk::PhysicalDeviceFeatures2 features;
    T extension_features;
    features.pNext = &extension_features;
@@ -30,17 +40,14 @@ T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
    return extension_features;
 }

-} // Anonymous namespace
-
-namespace Alternatives {
-
-constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
-                                           vk::Format::eD16UnormS8Uint, vk::Format{}};
-constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
-                                           vk::Format::eD32SfloatS8Uint, vk::Format{}};
-constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}};
-
-} // namespace Alternatives
+template <typename T>
+T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
+    vk::PhysicalDeviceProperties2 properties;
+    T extension_properties;
+    properties.pNext = &extension_properties;
+    physical.getProperties2(&properties, dldi);
+    return extension_properties;
+}

 constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
    switch (format) {
@@ -53,8 +60,7 @@ constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
    }
 }

-constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
-                                                   FormatType format_type) {
+vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, FormatType format_type) {
    switch (format_type) {
    case FormatType::Linear:
        return properties.linearTilingFeatures;
@@ -67,11 +73,13 @@ constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properti
    }
 }

+} // Anonymous namespace
+
 VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
                   vk::SurfaceKHR surface)
-    : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
+    : physical{physical}, properties{physical.getProperties(dldi)},
+      format_properties{GetFormatProperties(dldi, physical)} {
    SetupFamilies(dldi, surface);
-    SetupProperties(dldi);
    SetupFeatures(dldi);
 }

@@ -89,12 +97,22 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
    features.depthClamp = true;
    features.samplerAnisotropy = true;
    features.largePoints = true;
+    features.multiViewport = true;
+    features.depthBiasClamp = true;
+    features.geometryShader = true;
+    features.tessellationShader = true;
+    features.fragmentStoresAndAtomics = true;
+    features.shaderImageGatherExtended = true;
+    features.shaderStorageImageWriteWithoutFormat = true;
    features.textureCompressionASTC_LDR = is_optimal_astc_supported;

-    vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor;
-    vertex_divisor.vertexAttributeInstanceRateDivisor = true;
-    vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true;
-    SetNext(next, vertex_divisor);
+    vk::PhysicalDevice16BitStorageFeaturesKHR bit16_storage;
+    bit16_storage.uniformAndStorageBuffer16BitAccess = true;
+    SetNext(next, bit16_storage);
+
+    vk::PhysicalDevice8BitStorageFeaturesKHR bit8_storage;
+    bit8_storage.uniformAndStorageBuffer8BitAccess = true;
+    SetNext(next, bit8_storage);

    vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
    if (is_float16_supported) {
@@ -120,6 +138,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
        LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
    }

+    if (!ext_depth_range_unrestricted) {
+        LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
+    }
+
    vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0,
                                   nullptr, static_cast<u32>(extensions.size()), extensions.data(),
                                   nullptr);
@@ -135,16 +157,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
    logical = UniqueDevice(
        dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));

-    if (khr_driver_properties) {
-        vk::PhysicalDeviceDriverPropertiesKHR driver;
-        vk::PhysicalDeviceProperties2 properties;
-        properties.pNext = &driver;
-        physical.getProperties2(&properties, dld);
-        driver_id = driver.driverID;
-        LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo);
-    } else {
-        LOG_INFO(Render_Vulkan, "Driver: Unknown");
-    }
+    CollectTelemetryParameters();

    graphics_queue = logical->getQueue(graphics_family, 0, dld);
    present_queue = logical->getQueue(present_family, 0, dld);
@@ -190,6 +203,18 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,

 bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
                                      const vk::DispatchLoaderDynamic& dldi) const {
+    // Disable for now to avoid converting ASTC twice.
+    return false;
+    static constexpr std::array astc_formats = {
+        vk::Format::eAstc4x4SrgbBlock,    vk::Format::eAstc8x8SrgbBlock,
+        vk::Format::eAstc8x5SrgbBlock,    vk::Format::eAstc5x4SrgbBlock,
+        vk::Format::eAstc5x5UnormBlock,   vk::Format::eAstc5x5SrgbBlock,
+        vk::Format::eAstc10x8UnormBlock,  vk::Format::eAstc10x8SrgbBlock,
+        vk::Format::eAstc6x6UnormBlock,   vk::Format::eAstc6x6SrgbBlock,
+        vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
+        vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock,
+        vk::Format::eAstc8x6UnormBlock,   vk::Format::eAstc8x6SrgbBlock,
+        vk::Format::eAstc6x5UnormBlock,   vk::Format::eAstc6x5SrgbBlock};
    if (!features.textureCompressionASTC_LDR) {
        return false;
    }
@@ -197,12 +222,6 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
        vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
        vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
        vk::FormatFeatureFlagBits::eTransferDst};
-    constexpr std::array astc_formats = {
-        vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
-        vk::Format::eAstc8x8SrgbBlock,  vk::Format::eAstc8x6SrgbBlock,
-        vk::Format::eAstc5x4SrgbBlock,  vk::Format::eAstc5x5UnormBlock,
-        vk::Format::eAstc5x5SrgbBlock,  vk::Format::eAstc10x8UnormBlock,
-        vk::Format::eAstc10x8SrgbBlock};
    for (const auto format : astc_formats) {
        const auto format_properties{physical.getFormatProperties(format, dldi)};
        if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
@@ -225,11 +244,17 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag

 bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
                          vk::SurfaceKHR surface) {
-    LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName);
    bool is_suitable = true;

-    constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
-                                                VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME};
+    constexpr std::array required_extensions = {
+        VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+        VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
+        VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
+        VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
+        VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
+        VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
+        VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
+    };
    std::bitset<required_extensions.size()> available_extensions{};

    for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
@@ -246,7 +271,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
            if (available_extensions[i]) {
                continue;
            }
-            LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
+            LOG_ERROR(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
            is_suitable = false;
        }
    }
@@ -263,7 +288,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
        has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
    }
    if (!has_graphics || !has_present) {
-        LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue");
+        LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
        is_suitable = false;
    }

@@ -273,8 +298,15 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev

    constexpr u32 required_ubo_size = 65536;
    if (limits.maxUniformBufferRange < required_ubo_size) {
-        LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)",
-                 limits.maxUniformBufferRange, required_ubo_size);
+        LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required",
+                  limits.maxUniformBufferRange, required_ubo_size);
+        is_suitable = false;
+    }
+
+    constexpr u32 required_num_viewports = 16;
+    if (limits.maxViewports < required_num_viewports) {
+        LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required",
+                 limits.maxViewports, required_num_viewports);
        is_suitable = false;
    }

@@ -285,24 +317,32 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
        std::make_pair(features.depthClamp, "depthClamp"),
        std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
        std::make_pair(features.largePoints, "largePoints"),
+        std::make_pair(features.multiViewport, "multiViewport"),
+        std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
+        std::make_pair(features.geometryShader, "geometryShader"),
+        std::make_pair(features.tessellationShader, "tessellationShader"),
+        std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
+        std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
+        std::make_pair(features.shaderStorageImageWriteWithoutFormat,
+                       "shaderStorageImageWriteWithoutFormat"),
    };
    for (const auto& [supported, name] : feature_report) {
        if (supported) {
            continue;
        }
-        LOG_INFO(Render_Vulkan, "Missing required feature: {}", name);
+        LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
        is_suitable = false;
    }

+    if (!is_suitable) {
+        LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName);
+    }
+
    return is_suitable;
 }

 std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
    std::vector<const char*> extensions;
-    extensions.reserve(7);
-    extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
-    extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
-
    const auto Test = [&](const vk::ExtensionProperties& extension,
                          std::optional<std::reference_wrapper<bool>> status, const char* name,
                          bool push) {
@@ -317,13 +357,30 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
        }
    };

+    extensions.reserve(13);
+    extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+    extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
+    extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
+    extensions.push_back(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME);
+    extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
+    extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
+    extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
+
+    [[maybe_unused]] const bool nsight =
+        std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
    bool khr_shader_float16_int8{};
+    bool ext_subgroup_size_control{};
    for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
        Test(extension, khr_uniform_buffer_standard_layout,
             VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
-        Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
-        Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true);
        Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
+        Test(extension, ext_depth_range_unrestricted,
+             VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
+        Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
+        Test(extension, ext_shader_viewport_index_layer,
+             VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
+        Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
+             false);
    }

    if (khr_shader_float16_int8) {
@@ -332,6 +389,23 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
        extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
    }

+    if (ext_subgroup_size_control) {
+        const auto features =
+            GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
+        const auto properties =
+            GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi);
+
+        is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize;
+
+        if (features.subgroupSizeControl && properties.minSubgroupSize <= GuestWarpSize &&
+            properties.maxSubgroupSize >= GuestWarpSize) {
+            extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
+            guest_warp_stages = properties.requiredSubgroupSizeStages;
+        }
+    } else {
+        is_warp_potentially_bigger = true;
+    }
+
    return extensions;
 }

@@ -358,19 +432,23 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
    present_family = *present_family_;
 }

-void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
-    const auto props = physical.getProperties(dldi);
-    device_type = props.deviceType;
-    uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
-    storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment);
-    max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
-}
-
 void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
    const auto supported_features{physical.getFeatures(dldi)};
    is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
 }

+void VKDevice::CollectTelemetryParameters() {
+    const auto driver = GetProperties<vk::PhysicalDeviceDriverPropertiesKHR>(physical, dld);
+    driver_id = driver.driverID;
+    vendor_name = driver.driverName;
+
+    const auto extensions = physical.enumerateDeviceExtensionProperties(nullptr, dld);
+    reported_extensions.reserve(std::size(extensions));
+    for (const auto& extension : extensions) {
+        reported_extensions.push_back(extension.extensionName);
+    }
+}
+
 std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
    static const float QUEUE_PRIORITY = 1.0f;

@@ -385,50 +463,70 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con

 std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
    const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
-    constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
-                                 vk::Format::eA8B8G8R8SnormPack32,
-                                 vk::Format::eA8B8G8R8SrgbPack32,
-                                 vk::Format::eB5G6R5UnormPack16,
-                                 vk::Format::eA2B10G10R10UnormPack32,
-                                 vk::Format::eR32G32B32A32Sfloat,
-                                 vk::Format::eR16G16B16A16Uint,
-                                 vk::Format::eR16G16Unorm,
-                                 vk::Format::eR16G16Snorm,
-                                 vk::Format::eR16G16Sfloat,
-                                 vk::Format::eR16Unorm,
-                                 vk::Format::eR8G8B8A8Srgb,
-                                 vk::Format::eR8G8Unorm,
-                                 vk::Format::eR8G8Snorm,
-                                 vk::Format::eR8Unorm,
-                                 vk::Format::eB10G11R11UfloatPack32,
-                                 vk::Format::eR32Sfloat,
-                                 vk::Format::eR16Sfloat,
-                                 vk::Format::eR16G16B16A16Sfloat,
-                                 vk::Format::eB8G8R8A8Unorm,
-                                 vk::Format::eD32Sfloat,
-                                 vk::Format::eD16Unorm,
-                                 vk::Format::eD16UnormS8Uint,
-                                 vk::Format::eD24UnormS8Uint,
-                                 vk::Format::eD32SfloatS8Uint,
-                                 vk::Format::eBc1RgbaUnormBlock,
-                                 vk::Format::eBc2UnormBlock,
-                                 vk::Format::eBc3UnormBlock,
-                                 vk::Format::eBc4UnormBlock,
-                                 vk::Format::eBc5UnormBlock,
-                                 vk::Format::eBc5SnormBlock,
-                                 vk::Format::eBc7UnormBlock,
-                                 vk::Format::eBc1RgbaSrgbBlock,
-                                 vk::Format::eBc3SrgbBlock,
-                                 vk::Format::eBc7SrgbBlock,
-                                 vk::Format::eAstc4x4UnormBlock,
-                                 vk::Format::eAstc4x4SrgbBlock,
-                                 vk::Format::eAstc8x8SrgbBlock,
-                                 vk::Format::eAstc8x6SrgbBlock,
-                                 vk::Format::eAstc5x4SrgbBlock,
-                                 vk::Format::eAstc5x5UnormBlock,
-                                 vk::Format::eAstc5x5SrgbBlock,
-                                 vk::Format::eAstc10x8UnormBlock,
-                                 vk::Format::eAstc10x8SrgbBlock};
+    static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
+                                        vk::Format::eA8B8G8R8SnormPack32,
+                                        vk::Format::eA8B8G8R8SrgbPack32,
+                                        vk::Format::eB5G6R5UnormPack16,
+                                        vk::Format::eA2B10G10R10UnormPack32,
+                                        vk::Format::eA1R5G5B5UnormPack16,
+                                        vk::Format::eR32G32B32A32Sfloat,
+                                        vk::Format::eR32G32B32A32Uint,
+                                        vk::Format::eR32G32Sfloat,
+                                        vk::Format::eR32G32Uint,
+                                        vk::Format::eR16G16B16A16Uint,
+                                        vk::Format::eR16G16B16A16Unorm,
+                                        vk::Format::eR16G16Unorm,
+                                        vk::Format::eR16G16Snorm,
+                                        vk::Format::eR16G16Sfloat,
+                                        vk::Format::eR16Unorm,
+                                        vk::Format::eR8G8B8A8Srgb,
+                                        vk::Format::eR8G8Unorm,
+                                        vk::Format::eR8G8Snorm,
+                                        vk::Format::eR8Unorm,
+                                        vk::Format::eR8Uint,
+                                        vk::Format::eB10G11R11UfloatPack32,
+                                        vk::Format::eR32Sfloat,
+                                        vk::Format::eR32Uint,
+                                        vk::Format::eR16Sfloat,
+                                        vk::Format::eR16G16B16A16Sfloat,
+                                        vk::Format::eB8G8R8A8Unorm,
+                                        vk::Format::eR4G4B4A4UnormPack16,
+                                        vk::Format::eD32Sfloat,
+                                        vk::Format::eD16Unorm,
+                                        vk::Format::eD16UnormS8Uint,
+                                        vk::Format::eD24UnormS8Uint,
+                                        vk::Format::eD32SfloatS8Uint,
+                                        vk::Format::eBc1RgbaUnormBlock,
+                                        vk::Format::eBc2UnormBlock,
+                                        vk::Format::eBc3UnormBlock,
+                                        vk::Format::eBc4UnormBlock,
+                                        vk::Format::eBc5UnormBlock,
+                                        vk::Format::eBc5SnormBlock,
+                                        vk::Format::eBc7UnormBlock,
+                                        vk::Format::eBc6HUfloatBlock,
+                                        vk::Format::eBc6HSfloatBlock,
+                                        vk::Format::eBc1RgbaSrgbBlock,
+                                        vk::Format::eBc3SrgbBlock,
+                                        vk::Format::eBc7SrgbBlock,
+                                        vk::Format::eAstc4x4SrgbBlock,
+                                        vk::Format::eAstc8x8SrgbBlock,
+                                        vk::Format::eAstc8x5SrgbBlock,
+                                        vk::Format::eAstc5x4SrgbBlock,
+                                        vk::Format::eAstc5x5UnormBlock,
+                                        vk::Format::eAstc5x5SrgbBlock,
+                                        vk::Format::eAstc10x8UnormBlock,
+                                        vk::Format::eAstc10x8SrgbBlock,
+                                        vk::Format::eAstc6x6UnormBlock,
+                                        vk::Format::eAstc6x6SrgbBlock,
+                                        vk::Format::eAstc10x10UnormBlock,
+                                        vk::Format::eAstc10x10SrgbBlock,
+                                        vk::Format::eAstc12x12UnormBlock,
+                                        vk::Format::eAstc12x12SrgbBlock,
+                                        vk::Format::eAstc8x6UnormBlock,
+                                        vk::Format::eAstc8x6SrgbBlock,
+                                        vk::Format::eAstc6x5UnormBlock,
+                                        vk::Format::eAstc6x5SrgbBlock,
+                                        vk::Format::eE5B9G9R9UfloatPack32};
    std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
    for (const auto format : formats) {
        format_properties.emplace(format, physical.getFormatProperties(format, dldi));
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -4,6 +4,8 @@

 #pragma once

+#include <string>
+#include <string_view>
 #include <unordered_map>
 #include <vector>
 #include "common/common_types.h"
@@ -14,6 +16,9 @@ namespace Vulkan {
 /// Format usage descriptor.
 enum class FormatType { Linear, Optimal, Buffer };

+/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
+const u32 GuestWarpSize = 32;
+
 /// Handles data specific to a physical device.
 class VKDevice final {
 public:
@@ -71,7 +76,22 @@ public:

    /// Returns true if the device is integrated with the host CPU.
    bool IsIntegrated() const {
-        return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
+        return properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
+    }
+
+    /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
+    u32 GetApiVersion() const {
+        return properties.apiVersion;
+    }
+
+    /// Returns the current driver version provided in Vulkan-formatted version numbers.
+    u32 GetDriverVersion() const {
+        return properties.driverVersion;
+    }
+
+    /// Returns the device name.
+    std::string_view GetModelName() const {
+        return properties.deviceName;
    }

    /// Returns the driver ID.
@@ -80,18 +100,23 @@ public:
    }

    /// Returns uniform buffer alignment requeriment.
-    u64 GetUniformBufferAlignment() const {
-        return uniform_buffer_alignment;
+    vk::DeviceSize GetUniformBufferAlignment() const {
+        return properties.limits.minUniformBufferOffsetAlignment;
    }

    /// Returns storage alignment requeriment.
-    u64 GetStorageBufferAlignment() const {
-        return storage_buffer_alignment;
+    vk::DeviceSize GetStorageBufferAlignment() const {
+        return properties.limits.minStorageBufferOffsetAlignment;
    }

    /// Returns the maximum range for storage buffers.
-    u64 GetMaxStorageBufferRange() const {
-        return max_storage_buffer_range;
+    vk::DeviceSize GetMaxStorageBufferRange() const {
+        return properties.limits.maxStorageBufferRange;
+    }
+
+    /// Returns the maximum size for push constants.
+    vk::DeviceSize GetMaxPushConstantsSize() const {
+        return properties.limits.maxPushConstantsSize;
    }

    /// Returns true if ASTC is natively supported.
@@ -104,6 +129,16 @@ public:
        return is_float16_supported;
    }

+    /// Returns true if the device warp size can potentially be bigger than guest's warp size.
+    bool IsWarpSizePotentiallyBiggerThanGuest() const {
+        return is_warp_potentially_bigger;
+    }
+
+    /// Returns true if the device can be forced to use the guest warp size.
+    bool IsGuestWarpSizeSupported(vk::ShaderStageFlagBits stage) const {
+        return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
+    }
+
    /// Returns true if the device supports VK_EXT_scalar_block_layout.
    bool IsKhrUniformBufferStandardLayoutSupported() const {
        return khr_uniform_buffer_standard_layout;
@@ -114,6 +149,26 @@ public:
        return ext_index_type_uint8;
    }

+    /// Returns true if the device supports VK_EXT_depth_range_unrestricted.
+    bool IsExtDepthRangeUnrestrictedSupported() const {
+        return ext_depth_range_unrestricted;
+    }
+
+    /// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
+    bool IsExtShaderViewportIndexLayerSupported() const {
+        return ext_shader_viewport_index_layer;
+    }
+
+    /// Returns the vendor name reported from Vulkan.
+    std::string_view GetVendorName() const {
+        return vendor_name;
+    }
+
+    /// Returns the list of available extensions.
+    const std::vector<std::string>& GetAvailableExtensions() const {
+        return reported_extensions;
+    }
+
    /// Checks if the physical device is suitable.
    static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
                           vk::SurfaceKHR surface);
@@ -125,12 +180,12 @@ private:
    /// Sets up queue families.
    void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);

-    /// Sets up device properties.
-    void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
-
    /// Sets up device features.
    void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);

+    /// Collects telemetry information from the device.
+    void CollectTelemetryParameters();
+
    /// Returns a list of queue initialization descriptors.
    std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;

@@ -148,23 +203,28 @@ private:

    const vk::PhysicalDevice physical;         ///< Physical device.
    vk::DispatchLoaderDynamic dld;             ///< Device function pointers.
+    vk::PhysicalDeviceProperties properties;   ///< Device properties.
    UniqueDevice logical;                      ///< Logical device.
    vk::Queue graphics_queue;                  ///< Main graphics queue.
    vk::Queue present_queue;                   ///< Main present queue.
    u32 graphics_family{};                     ///< Main graphics queue family index.
    u32 present_family{};                      ///< Main present queue family index.
-    vk::PhysicalDeviceType device_type;        ///< Physical device type.
    vk::DriverIdKHR driver_id{};               ///< Driver ID.
-    u64 uniform_buffer_alignment{};            ///< Uniform buffer alignment requeriment.
-    u64 storage_buffer_alignment{};            ///< Storage buffer alignment requeriment.
-    u64 max_storage_buffer_range{};            ///< Max storage buffer size.
+    vk::ShaderStageFlags guest_warp_stages{};  ///< Stages where the guest warp size can be forced.
    bool is_optimal_astc_supported{};          ///< Support for native ASTC.
    bool is_float16_supported{};               ///< Support for float16 arithmetics.
+    bool is_warp_potentially_bigger{};         ///< Host warp size can be bigger than guest.
    bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
    bool ext_index_type_uint8{};               ///< Support for VK_EXT_index_type_uint8.
-    bool khr_driver_properties{};              ///< Support for VK_KHR_driver_properties.
-    std::unordered_map<vk::Format, vk::FormatProperties>
-        format_properties; ///< Format properties dictionary.
+    bool ext_depth_range_unrestricted{};       ///< Support for VK_EXT_depth_range_unrestricted.
+    bool ext_shader_viewport_index_layer{};    ///< Support for VK_EXT_shader_viewport_index_layer.
+
+    // Telemetry parameters
+    std::string vendor_name;                      ///< Device's driver name.
+    std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
+
+    /// Format properties dictionary.
+    std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -5,29 +5,28 @@
 #pragma once

 #include <array>
+#include <bitset>
 #include <memory>
 #include <set>
+#include <type_traits>
 #include <utility>
 #include <vector>

-#include <sirit/sirit.h>
-
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
 #include "video_core/shader/shader_ir.h"

-namespace VideoCommon::Shader {
-class ShaderIR;
-}
-
 namespace Vulkan {
 class VKDevice;
 }

-namespace Vulkan::VKShader {
+namespace Vulkan {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using TexelBufferEntry = VideoCommon::Shader::Sampler;
 using SamplerEntry = VideoCommon::Shader::Sampler;
+using ImageEntry = VideoCommon::Shader::Image;

 constexpr u32 DESCRIPTOR_SET = 0;

@@ -46,39 +45,74 @@ private:

 class GlobalBufferEntry {
 public:
-    explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset)
-        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+    constexpr explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset, bool is_written)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_written{is_written} {}

-    u32 GetCbufIndex() const {
+    constexpr u32 GetCbufIndex() const {
        return cbuf_index;
    }

-    u32 GetCbufOffset() const {
+    constexpr u32 GetCbufOffset() const {
        return cbuf_offset;
    }

+    constexpr bool IsWritten() const {
+        return is_written;
+    }
+
 private:
    u32 cbuf_index{};
    u32 cbuf_offset{};
+    bool is_written{};
 };

 struct ShaderEntries {
-    u32 const_buffers_base_binding{};
-    u32 global_buffers_base_binding{};
-    u32 samplers_base_binding{};
+    u32 NumBindings() const {
+        return static_cast<u32>(const_buffers.size() + global_buffers.size() +
+                                texel_buffers.size() + samplers.size() + images.size());
+    }
+
    std::vector<ConstBufferEntry> const_buffers;
    std::vector<GlobalBufferEntry> global_buffers;
+    std::vector<TexelBufferEntry> texel_buffers;
    std::vector<SamplerEntry> samplers;
+    std::vector<ImageEntry> images;
    std::set<u32> attributes;
    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
    std::size_t shader_length{};
-    Sirit::Id entry_function{};
-    std::vector<Sirit::Id> interfaces;
+    bool uses_warps{};
 };

-using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
+struct Specialization final {
+    u32 base_binding{};

-DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
-                           Tegra::Engines::ShaderType stage);
+    // Compute specific
+    std::array<u32, 3> workgroup_size{};
+    u32 shared_memory_size{};

-} // namespace Vulkan::VKShader
+    // Graphics specific
+    Maxwell::PrimitiveTopology primitive_topology{};
+    std::optional<float> point_size{};
+    std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
+
+    // Tessellation specific
+    struct {
+        Maxwell::TessellationPrimitive primitive{};
+        Maxwell::TessellationSpacing spacing{};
+        bool clockwise{};
+    } tessellation;
+};
+// Old gcc versions don't consider this trivially copyable.
+// static_assert(std::is_trivially_copyable_v<Specialization>);
+
+struct SPIRVShader {
+    std::vector<u32> code;
+    ShaderEntries entries;
+};
+
+ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
+
+std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+                           Tegra::Engines::ShaderType stage, const Specialization& specialization);
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -19,12 +19,18 @@
 namespace Vulkan {

 namespace {
-vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) {
+
+vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats,
+                                             bool srgb) {
    if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
-        return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear};
+        vk::SurfaceFormatKHR format;
+        format.format = vk::Format::eB8G8R8A8Unorm;
+        format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
+        return format;
    }
-    const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
-        return format.format == vk::Format::eB8G8R8A8Unorm &&
+    const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
+        const auto request_format = srgb ? vk::Format::eB8G8R8A8Srgb : vk::Format::eB8G8R8A8Unorm;
+        return format.format == request_format &&
               format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear;
    });
    return found != formats.end() ? *found : formats[0];
@@ -51,28 +57,26 @@ vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u3
                             std::min(capabilities.maxImageExtent.height, extent.height));
    return extent;
 }
-} // namespace
+
+} // Anonymous namespace

 VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device)
    : surface{surface}, device{device} {}

 VKSwapchain::~VKSwapchain() = default;

-void VKSwapchain::Create(u32 width, u32 height) {
-    const auto dev = device.GetLogical();
+void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
    const auto& dld = device.GetDispatchLoader();
    const auto physical_device = device.GetPhysical();
-
-    const vk::SurfaceCapabilitiesKHR capabilities{
-        physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
+    const auto capabilities{physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
    if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
        return;
    }

-    dev.waitIdle(dld);
+    device.GetLogical().waitIdle(dld);
    Destroy();

-    CreateSwapchain(capabilities, width, height);
+    CreateSwapchain(capabilities, width, height, srgb);
    CreateSemaphores();
    CreateImageViews();

@@ -107,7 +111,7 @@ bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
        break;
    case vk::Result::eErrorOutOfDateKHR:
        if (current_width > 0 && current_height > 0) {
-            Create(current_width, current_height);
+            Create(current_width, current_height, current_srgb);
            recreated = true;
        }
        break;
@@ -129,23 +133,19 @@ bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebu
 }

 void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
-                                  u32 height) {
-    const auto dev{device.GetLogical()};
+                                  u32 height, bool srgb) {
    const auto& dld{device.GetDispatchLoader()};
    const auto physical_device{device.GetPhysical()};
+    const auto formats{physical_device.getSurfaceFormatsKHR(surface, dld)};
+    const auto present_modes{physical_device.getSurfacePresentModesKHR(surface, dld)};

-    const std::vector<vk::SurfaceFormatKHR> formats{
-        physical_device.getSurfaceFormatsKHR(surface, dld)};
-
-    const std::vector<vk::PresentModeKHR> present_modes{
-        physical_device.getSurfacePresentModesKHR(surface, dld)};
-
-    const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
+    const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
    const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
    extent = ChooseSwapExtent(capabilities, width, height);

    current_width = extent.width;
    current_height = extent.height;
+    current_srgb = srgb;

    u32 requested_image_count{capabilities.minImageCount + 1};
    if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
@@ -169,6 +169,7 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
        swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
    }

+    const auto dev{device.GetLogical()};
    swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);

    images = dev.getSwapchainImagesKHR(*swapchain, dld);
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -24,7 +24,7 @@ public:
    ~VKSwapchain();

    /// Creates (or recreates) the swapchain with a given size.
-    void Create(u32 width, u32 height);
+    void Create(u32 width, u32 height, bool srgb);

    /// Acquires the next image in the swapchain, waits as needed.
    void AcquireNextImage();
@@ -60,8 +60,13 @@ public:
        return image_format;
    }

+    bool GetSrgbState() const {
+        return current_srgb;
+    }
+
 private:
-    void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height);
+    void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
+                         bool srgb);
    void CreateSemaphores();
    void CreateImageViews();

@@ -87,6 +92,7 @@ private:

    u32 current_width{};
    u32 current_height{};
+    bool current_srgb{};
 };

 } // namespace Vulkan
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -130,6 +130,25 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
        SetRegister(bb, instr.gpr0, value);
        break;
    }
+    case OpCode::Id::FLO_R:
+    case OpCode::Id::FLO_C:
+    case OpCode::Id::FLO_IMM: {
+        Node value;
+        if (instr.flo.invert) {
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
+        }
+        if (instr.flo.is_signed) {
+            value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
+        } else {
+            value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
+        }
+        if (instr.flo.sh) {
+            value =
+                Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
+        }
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
    case OpCode::Id::SEL_C:
    case OpCode::Id::SEL_R:
    case OpCode::Id::SEL_IMM: {
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -21,6 +21,7 @@ using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;

 namespace {
+
 u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
    case Tegra::Shader::UniformType::Single:
@@ -35,6 +36,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
        return 1;
    }
 }
+
 } // Anonymous namespace

 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -196,28 +198,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                             "Unaligned attribute loads are not supported");

-        u64 next_element = instr.attribute.fmt20.element;
-        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+        u64 element = instr.attribute.fmt20.element;
+        auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());

-        const auto StoreNextElement = [&](u32 reg_offset) {
-            const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
-                                                 next_element, GetRegister(instr.gpr39));
+        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+            Node dest;
+            if (instr.attribute.fmt20.patch) {
+                const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
+                dest = MakeNode<PatchNode>(offset);
+            } else {
+                dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
+                                          GetRegister(instr.gpr39));
+            }
            const auto src = GetRegister(instr.gpr0.Value() + reg_offset);

            bb.push_back(Operation(OperationCode::Assign, dest, src));

-            // Load the next attribute element into the following register. If the element
-            // to load goes beyond the vec4 size, load the first element of the next
-            // attribute.
-            next_element = (next_element + 1) % 4;
-            next_index = next_index + (next_element == 0 ? 1 : 0);
-        };
-
-        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
-        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
-            StoreNextElement(reg_offset);
+            // Load the next attribute element into the following register. If the element to load
+            // goes beyond the vec4 size, load the first element of the next attribute.
+            element = (element + 1) % 4;
+            index = index + (element == 0 ? 1 : 0);
        }
-
        break;
    }
    case OpCode::Id::ST_L:
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -69,6 +69,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
    case OpCode::Id::MOV_SYS: {
        const Node value = [this, instr] {
            switch (instr.sys20) {
+            case SystemVariable::InvocationId:
+                return Operation(OperationCode::InvocationId);
            case SystemVariable::Ydirection:
                return Operation(OperationCode::YNegate);
            case SystemVariable::InvocationInfo:
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -134,13 +134,52 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
        Node4 values;
        for (u32 element = 0; element < values.size(); ++element) {
            auto coords_copy = coords;
-            MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
+            MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, component, element};
            values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
        }

        WriteTexsInstructionFloat(bb, instr, values, true);
        break;
    }
+    case OpCode::Id::TXD_B:
+        is_bindless = true;
+        [[fallthrough]];
+    case OpCode::Id::TXD: {
+        UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented");
+
+        u64 base_reg = instr.gpr8.Value();
+        const auto derivate_reg = instr.gpr20.Value();
+        const auto texture_type = instr.txd.texture_type.Value();
+        const auto coord_count = GetCoordCount(texture_type);
+
+        const auto& sampler = is_bindless
+                                  ? GetBindlessSampler(base_reg, {{texture_type, false, false}})
+                                  : GetSampler(instr.sampler, {{texture_type, false, false}});
+        if (is_bindless) {
+            base_reg++;
+        }
+
+        std::vector<Node> coords;
+        std::vector<Node> derivates;
+        for (std::size_t i = 0; i < coord_count; ++i) {
+            coords.push_back(GetRegister(base_reg + i));
+            const std::size_t derivate = i * 2;
+            derivates.push_back(GetRegister(derivate_reg + derivate));
+            derivates.push_back(GetRegister(derivate_reg + derivate + 1));
+        }
+
+        Node4 values;
+        for (u32 element = 0; element < values.size(); ++element) {
+            MetaTexture meta{sampler, {}, {}, {}, derivates, {}, {}, {}, element};
+            values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
+        }
+
+        WriteTexInstructionFloat(bb, instr, values);
+
+        break;
+    }
    case OpCode::Id::TXQ_B:
        is_bindless = true;
        [[fallthrough]];
@@ -158,7 +197,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                if (!instr.txq.IsComponentEnabled(element)) {
                    continue;
                }
-                MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
+                MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
                const Node value =
                    Operation(OperationCode::TextureQueryDimensions, meta,
                              GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -212,7 +251,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                continue;
            }
            auto params = coords;
-            MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
+            MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
            SetTemporary(bb, indexer++, value);
        }
@@ -442,7 +481,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto copy_coords = coords;
-        MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
+        MetaTexture meta{sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
        values[element] = Operation(read_method, meta, std::move(copy_coords));
    }

@@ -574,7 +613,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
-        MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component,
+        MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component,
                         element};
        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
    }
@@ -608,7 +647,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
-        MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, lod, {}, element};
        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
    }

@@ -653,7 +692,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
-        MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array, {}, {}, {}, {}, lod, {}, element};
        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
    }
    return values;
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -38,6 +38,9 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

+    // Signal the backend that this shader uses warp instructions.
+    uses_warps = true;
+
    switch (opcode->get().GetId()) {
    case OpCode::Id::VOTE: {
        const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -68,6 +68,7 @@ enum class OperationCode {
    IBitfieldInsert,       /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
    IBitfieldExtract,      /// (MetaArithmetic, int value, int offset, int offset) -> int
    IBitCount,             /// (MetaArithmetic, int) -> int
+    IBitMSB,               /// (MetaArithmetic, int) -> int

    UAdd,                  /// (MetaArithmetic, uint a, uint b) -> uint
    UMul,                  /// (MetaArithmetic, uint a, uint b) -> uint
@@ -86,6 +87,7 @@ enum class OperationCode {
    UBitfieldInsert,  /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
    UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
    UBitCount,        /// (MetaArithmetic, uint) -> uint
+    UBitMSB,          /// (MetaArithmetic, uint) -> uint

    HAdd,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
    HMul,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
@@ -149,6 +151,7 @@ enum class OperationCode {
    TextureQueryDimensions, /// (MetaTexture, float a) -> float4
    TextureQueryLod,        /// (MetaTexture, float[N] coords) -> float4
    TexelFetch,             /// (MetaTexture, int[N], int) -> float4
+    TextureGradient,        /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4

    ImageLoad,  /// (MetaImage, int[N] coords) -> void
    ImageStore, /// (MetaImage, int[N] coords) -> void
@@ -169,6 +172,7 @@ enum class OperationCode {
    EmitVertex,   /// () -> void
    EndPrimitive, /// () -> void

+    InvocationId,       /// () -> int
    YNegate,            /// () -> float
    LocalInvocationIdX, /// () -> uint
    LocalInvocationIdY, /// () -> uint
@@ -210,13 +214,14 @@ class PredicateNode;
 class AbufNode;
 class CbufNode;
 class LmemNode;
+class PatchNode;
 class SmemNode;
 class GmemNode;
 class CommentNode;

-using NodeData =
-    std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
-                 PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>;
+using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
+                              InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
+                              LmemNode, SmemNode, GmemNode, CommentNode>;
 using Node = std::shared_ptr<NodeData>;
 using Node4 = std::array<Node, 4>;
 using NodeBlock = std::vector<Node>;
@@ -367,6 +372,7 @@ struct MetaTexture {
    Node array;
    Node depth_compare;
    std::vector<Node> aoffi;
+    std::vector<Node> derivates;
    Node bias;
    Node lod;
    Node component{};
@@ -538,6 +544,19 @@ private:
    u32 element{};
 };

+/// Patch memory (used to communicate tessellation stages).
+class PatchNode final {
+public:
+    explicit PatchNode(u32 offset) : offset{offset} {}
+
+    u32 GetOffset() const {
+        return offset;
+    }
+
+private:
+    u32 offset{};
+};
+
 /// Constant buffer node, usually mapped to uniform buffers in GLSL
 class CbufNode final {
 public:
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -137,6 +137,10 @@ public:
        return uses_vertex_id;
    }

+    bool UsesWarps() const {
+        return uses_warps;
+    }
+
    bool HasPhysicalAttributes() const {
        return uses_physical_attributes;
    }
@@ -415,6 +419,7 @@ private:
    bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
    bool uses_instance_id{};
    bool uses_vertex_id{};
+    bool uses_warps{};

    Tegra::Shader::Header header;
 };
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -7,6 +7,7 @@
 #include <variant>

 #include "common/common_types.h"
+#include "video_core/shader/node.h"
 #include "video_core/shader/shader_ir.h"

 namespace VideoCommon::Shader {
Author	SHA1	Message	Date
ReinUsesLisp	f564eaebed	gl_device: Enable compute shaders for Intel Mesa drivers Previously we naively checked for "Intel" in GL_VENDOR, but this includes both Intel's proprietary driver and the mesa driver. Re-enable compute shaders for mesa.	2019-12-11 00:00:30 -03:00
bunnei	34f8881d3e	Merge pull request #3201 from lioncash/dump kernel/svc: Provide implementations for svcDumpInfo/svcDumpInfoNew	2019-12-10 21:48:37 -05:00
Rodrigo Locatti	c8db7d1399	Merge pull request #3211 from FernandoS27/depth-mode Maxwell3D: Implement Depth Mode.	2019-12-10 21:20:52 -03:00
Fernando Sahmkow	7ffb672f61	Maxwell3D: Implement Depth Mode. This commit finishes adding depth mode that was reverted before due to other unresolved issues.	2019-12-10 19:51:46 -04:00
Fernando Sahmkow	6edadef96d	Merge pull request #3208 from ReinUsesLisp/vk-shader-decompiler vk_shader_decompiler: Add tessellation and misc changes	2019-12-10 08:01:41 -04:00
ReinUsesLisp	233ed96a5c	vk_shader_decompiler: Fix build issues on old gcc versions	2019-12-10 01:55:38 -03:00
ReinUsesLisp	d30cf51d7d	vk_shader_decompiler: Reduce YNegate's severity	2019-12-09 23:52:28 -03:00
ReinUsesLisp	0b5b93053d	shader_ir/other: Implement S2R InvocationId	2019-12-09 23:52:28 -03:00
ReinUsesLisp	ecbfa416f0	vk_shader_decompiler: Misc changes Update Sirit and its usage in vk_shader_decompiler. Highlights: - Implement tessellation shaders - Implement geometry shaders - Implement some missing features - Use native half float instructions when available.	2019-12-09 23:51:57 -03:00
ReinUsesLisp	9ad6327fbd	shader: Keep track of shaders using warp instructions	2019-12-09 23:40:41 -03:00
ReinUsesLisp	6233b1db08	shader_ir/memory: Implement patch stores	2019-12-09 23:25:21 -03:00
Fernando Sahmkow	f2458106e6	Merge pull request #3205 from ReinUsesLisp/vk-device vk_device: Misc changes	2019-12-09 20:02:58 -04:00
ReinUsesLisp	19ce0d4f1a	vk_device: Misc changes - Setup more features and requirements. - Improve logging for missing features. - Collect telemetry parameters. - Add queries for more image formats. - Query push constants limits. - Optionally enable some extensions.	2019-12-09 01:04:48 -03:00
bunnei	faf5ae6a50	Merge pull request #3198 from ReinUsesLisp/tessellation-maxwell maxwell_3d: Add tessellation state entries	2019-12-08 22:28:25 -05:00
bunnei	116a940dbb	Merge pull request #3204 from ReinUsesLisp/vulkan-headers externals: Update Vulkan-Headers	2019-12-08 22:28:02 -05:00
ReinUsesLisp	7ea362e134	externals: Update Vulkan-Headers	2019-12-08 22:08:19 -03:00
Rodrigo Locatti	e54699565a	Merge pull request #3199 from ReinUsesLisp/vk-swapchain vk_swapchain: Add support for swapping sRGB	2019-12-08 21:53:22 -03:00
Rodrigo Locatti	f73e569ba8	Merge pull request #3202 from lioncash/kernel-include kernel: Remove unnecessary includes	2019-12-08 01:23:58 -03:00
Lioncash	c3e43c7e81	kernel: Remove unnecessary includes Over the course of the changes to the kernel code, a few includes are no longer necessary, particularly with the change over to std::shared_ptr from Boost's intrusive_ptr.	2019-12-07 22:37:05 -05:00
Lioncash	67b8265bd6	kernel/svc: Provide implementations for svcDumpInfo/svcDumpInfoNew These are fairly trivial to implement, we can just do nothing. This also provides a spot for us to potentially dump out any relevant info in the future (e.g. for debugging purposes with homebrew, etc). While we're at it, we can also correct the names of both of these supervisor calls.	2019-12-07 22:01:17 -05:00
ReinUsesLisp	f632d00eb1	vk_swapchain: Add support for swapping sRGB We don't know until the game is running if it's using an sRGB color space or not. Add support for hot-swapping swapchain surface formats.	2019-12-06 22:42:08 -03:00
ReinUsesLisp	36651f215a	maxwell_3d: Add tessellation tess level registers	2019-12-06 22:08:22 -03:00
ReinUsesLisp	707bf41c6f	maxwell_3d: Add tessellation mode register	2019-12-06 22:07:31 -03:00
ReinUsesLisp	d2b50c5ebd	maxwell_3d: Add patch vertices register	2019-12-06 22:06:53 -03:00
bunnei	4bbb22a477	Merge pull request #3195 from FernandoS27/clear-exclusive CpuCore: Clear exclusive state after doing a run in dynarmic.	2019-12-06 20:00:23 -05:00
bunnei	d49ed4a421	Merge pull request #3197 from ReinUsesLisp/shader-char shader_bytecode: Remove corrupted character	2019-12-06 19:05:40 -05:00
ReinUsesLisp	74f515e8b6	shader_bytecode: Remove corrupted character	2019-12-06 20:31:56 -03:00
bunnei	e36814d6d5	Merge pull request #3109 from FernandoS27/new-instr Implement FLO & TXD Instructions on GPU Shaders	2019-12-06 18:18:16 -05:00
bunnei	ef2b6733d0	Merge pull request #3196 from jmerdich/fix-ea-source-build [EA source]: Ignore git-related files in cmake for early access tarballs	2019-12-06 13:19:04 -05:00
Jake Merdich	dc70a36b44	Ignore git-related files in cmake for EA tarballs This fixes the early-access builds on Windows (tested on EA 58). Cmake was previously looking for git-related files that were stripped out of the early access builds and failing; check if those exist before reading them.	2019-12-05 22:48:20 -05:00
Fernando Sahmkow	40cd4df584	CpuCore: Clear exclusive state after doing a run in dynarmic. This commit corrects an error in which a Core could remain with an exclusive state after running, leaving space for possible race conditions between changing cores.	2019-12-05 18:08:59 -04:00
bunnei	2f79cc3ef5	Merge pull request #3191 from yuzu-emu/jroweboy-patch-1 Add missing CMakeLists.txt from root folder to source upload	2019-12-03 17:38:52 -05:00
James Rowe	2883cc1658	Add missing CMakeLists.txt from root folder to source upload Fixes #3190	2019-12-03 15:35:28 -07:00
bunnei	560cfbc21a	Merge pull request #3188 from ReinUsesLisp/backend-telemetry telemetry_session: Report renderer backend	2019-12-02 15:22:51 -05:00
Fernando Sahmkow	c8473f399e	Shader_IR: Address Feedback	2019-11-18 07:34:34 -04:00
Fernando Sahmkow	cd0f5dfc17	Shader_IR: Implement TXD instruction.	2019-11-14 11:15:27 -04:00
Fernando Sahmkow	f3d1b370aa	Shader_IR: Implement FLO instruction.	2019-11-14 11:15:27 -04:00
Fernando Sahmkow	95137a04e1	Shader_Bytecode: Add encodings for FLO, SHF and TXD	2019-11-14 11:15:26 -04:00