Compare commits
38 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f564eaebed | ||
|
|
34f8881d3e | ||
|
|
c8db7d1399 | ||
|
|
7ffb672f61 | ||
|
|
6edadef96d | ||
|
|
233ed96a5c | ||
|
|
d30cf51d7d | ||
|
|
0b5b93053d | ||
|
|
ecbfa416f0 | ||
|
|
9ad6327fbd | ||
|
|
6233b1db08 | ||
|
|
f2458106e6 | ||
|
|
19ce0d4f1a | ||
|
|
faf5ae6a50 | ||
|
|
116a940dbb | ||
|
|
7ea362e134 | ||
|
|
e54699565a | ||
|
|
f73e569ba8 | ||
|
|
c3e43c7e81 | ||
|
|
67b8265bd6 | ||
|
|
f632d00eb1 | ||
|
|
36651f215a | ||
|
|
707bf41c6f | ||
|
|
d2b50c5ebd | ||
|
|
4bbb22a477 | ||
|
|
d49ed4a421 | ||
|
|
74f515e8b6 | ||
|
|
e36814d6d5 | ||
|
|
ef2b6733d0 | ||
|
|
dc70a36b44 | ||
|
|
40cd4df584 | ||
|
|
2f79cc3ef5 | ||
|
|
2883cc1658 | ||
|
|
560cfbc21a | ||
|
|
c8473f399e | ||
|
|
cd0f5dfc17 | ||
|
|
f3d1b370aa | ||
|
|
95137a04e1 |
@@ -39,6 +39,7 @@ mkdir "artifacts"
|
||||
# Build a tar.xz for the source of the release
|
||||
Copy-Item .\license.txt -Destination $MSVC_SOURCE
|
||||
Copy-Item .\README.md -Destination $MSVC_SOURCE
|
||||
Copy-Item .\CMakeLists.txt -Destination $MSVC_SOURCE
|
||||
Copy-Item .\src -Recurse -Destination $MSVC_SOURCE
|
||||
Copy-Item .\externals -Recurse -Destination $MSVC_SOURCE
|
||||
Copy-Item .\dist -Recurse -Destination $MSVC_SOURCE
|
||||
@@ -60,4 +61,4 @@ Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Ite
|
||||
|
||||
Get-ChildItem . -Filter "*.zip" | Copy-Item -destination "artifacts"
|
||||
Get-ChildItem . -Filter "*.7z" | Copy-Item -destination "artifacts"
|
||||
Get-ChildItem . -Filter "*.tar.xz" | Copy-Item -destination "artifacts"
|
||||
Get-ChildItem . -Filter "*.tar.xz" | Copy-Item -destination "artifacts"
|
||||
|
||||
@@ -29,7 +29,7 @@ option(ENABLE_VULKAN "Enables Vulkan backend" ON)
|
||||
|
||||
option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)
|
||||
|
||||
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/hooks/pre-commit AND NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
|
||||
message(STATUS "Copying pre-commit hook")
|
||||
file(COPY hooks/pre-commit
|
||||
DESTINATION ${PROJECT_SOURCE_DIR}/.git/hooks)
|
||||
@@ -49,7 +49,10 @@ function(check_submodules_present)
|
||||
endif()
|
||||
endforeach()
|
||||
endfunction()
|
||||
check_submodules_present()
|
||||
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/.gitmodules)
|
||||
check_submodules_present()
|
||||
endif()
|
||||
|
||||
configure_file(${PROJECT_SOURCE_DIR}/dist/compatibility_list/compatibility_list.qrc
|
||||
${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc
|
||||
|
||||
2
externals/Vulkan-Headers
vendored
2
externals/Vulkan-Headers
vendored
Submodule externals/Vulkan-Headers updated: fd568d51ed...d42d0747ee
2
externals/sirit
vendored
2
externals/sirit
vendored
Submodule externals/sirit updated: f7c4b07a7e...e1a6729df7
@@ -96,6 +96,8 @@ void Cpu::RunLoop(bool tight_loop) {
|
||||
} else {
|
||||
arm_interface->Step();
|
||||
}
|
||||
// We are stopping a run, exclusive state must be cleared
|
||||
arm_interface->ClearExclusiveState();
|
||||
}
|
||||
core_timing.Advance();
|
||||
|
||||
|
||||
@@ -11,8 +11,6 @@
|
||||
#include "core/core_cpu.h"
|
||||
#include "core/hle/kernel/address_arbiter.h"
|
||||
#include "core/hle/kernel/errors.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/hle/kernel/scheduler.h"
|
||||
#include "core/hle/kernel/thread.h"
|
||||
#include "core/hle/result.h"
|
||||
|
||||
@@ -4,10 +4,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
union ResultCode;
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "core/hle/kernel/hle_ipc.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/kernel/server_port.h"
|
||||
#include "core/hle/kernel/server_session.h"
|
||||
#include "core/hle/kernel/session.h"
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
@@ -4,7 +4,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/result.h"
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/result.h"
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
#include "core/hle/kernel/address_arbiter.h"
|
||||
#include "core/hle/kernel/client_port.h"
|
||||
#include "core/hle/kernel/errors.h"
|
||||
#include "core/hle/kernel/handle_table.h"
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
|
||||
@@ -458,7 +458,6 @@ void Scheduler::SwitchContext() {
|
||||
cpu_core.LoadContext(new_thread->GetContext());
|
||||
cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
|
||||
cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
|
||||
cpu_core.ClearExclusiveState();
|
||||
} else {
|
||||
current_thread = nullptr;
|
||||
// Note: We do not reset the current process and current page table when idling because
|
||||
|
||||
@@ -4,11 +4,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/multi_level_queue.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/kernel/thread.h"
|
||||
|
||||
namespace Core {
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "core/hle/kernel/wait_object.h"
|
||||
#include "core/hle/result.h"
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
@@ -1781,6 +1781,17 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
|
||||
return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
|
||||
}
|
||||
|
||||
static void KernelDebug([[maybe_unused]] Core::System& system,
|
||||
[[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1,
|
||||
[[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) {
|
||||
// Intentionally do nothing, as this does nothing in released kernel binaries.
|
||||
}
|
||||
|
||||
static void ChangeKernelTraceState([[maybe_unused]] Core::System& system,
|
||||
[[maybe_unused]] u32 trace_state) {
|
||||
// Intentionally do nothing, as this does nothing in released kernel binaries.
|
||||
}
|
||||
|
||||
/// This returns the total CPU ticks elapsed since the CPU was powered-on
|
||||
static u64 GetSystemTick(Core::System& system) {
|
||||
LOG_TRACE(Kernel_SVC, "called");
|
||||
@@ -2418,8 +2429,8 @@ static const FunctionDef SVC_Table[] = {
|
||||
{0x39, nullptr, "Unknown"},
|
||||
{0x3A, nullptr, "Unknown"},
|
||||
{0x3B, nullptr, "Unknown"},
|
||||
{0x3C, nullptr, "DumpInfo"},
|
||||
{0x3D, nullptr, "DumpInfoNew"},
|
||||
{0x3C, SvcWrap<KernelDebug>, "KernelDebug"},
|
||||
{0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"},
|
||||
{0x3E, nullptr, "Unknown"},
|
||||
{0x3F, nullptr, "Unknown"},
|
||||
{0x40, nullptr, "CreateSession"},
|
||||
|
||||
@@ -311,6 +311,17 @@ void SvcWrap(Core::System& system) {
|
||||
func(system);
|
||||
}
|
||||
|
||||
template <void func(Core::System&, u32)>
|
||||
void SvcWrap(Core::System& system) {
|
||||
func(system, static_cast<u32>(Param(system, 0)));
|
||||
}
|
||||
|
||||
template <void func(Core::System&, u32, u64, u64, u64)>
|
||||
void SvcWrap(Core::System& system) {
|
||||
func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2),
|
||||
Param(system, 3));
|
||||
}
|
||||
|
||||
template <void func(Core::System&, s64)>
|
||||
void SvcWrap(Core::System& system) {
|
||||
func(system, static_cast<s64>(Param(system, 0)));
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/kernel/physical_memory.h"
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <boost/smart_ptr/intrusive_ptr.hpp>
|
||||
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
@@ -310,6 +310,11 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
enum class DepthMode : u32 {
|
||||
MinusOneToOne = 0,
|
||||
ZeroToOne = 1,
|
||||
};
|
||||
|
||||
enum class PrimitiveTopology : u32 {
|
||||
Points = 0x0,
|
||||
Lines = 0x1,
|
||||
@@ -491,6 +496,18 @@ public:
|
||||
INSERT_UNION_PADDING_WORDS(1);
|
||||
};
|
||||
|
||||
enum class TessellationPrimitive : u32 {
|
||||
Isolines = 0,
|
||||
Triangles = 1,
|
||||
Quads = 2,
|
||||
};
|
||||
|
||||
enum class TessellationSpacing : u32 {
|
||||
Equal = 0,
|
||||
FractionalOdd = 1,
|
||||
FractionalEven = 2,
|
||||
};
|
||||
|
||||
struct RenderTargetConfig {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
@@ -628,7 +645,19 @@ public:
|
||||
};
|
||||
} sync_info;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x11E);
|
||||
INSERT_UNION_PADDING_WORDS(0x15);
|
||||
|
||||
union {
|
||||
BitField<0, 2, TessellationPrimitive> prim;
|
||||
BitField<4, 2, TessellationSpacing> spacing;
|
||||
BitField<8, 1, u32> cw;
|
||||
BitField<9, 1, u32> connected;
|
||||
} tess_mode;
|
||||
|
||||
std::array<f32, 4> tess_level_outer;
|
||||
std::array<f32, 2> tess_level_inner;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x102);
|
||||
|
||||
u32 tfb_enabled;
|
||||
|
||||
@@ -647,7 +676,7 @@ public:
|
||||
u32 count;
|
||||
} vertex_buffer;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(1);
|
||||
DepthMode depth_mode;
|
||||
|
||||
float clear_color[4];
|
||||
float clear_depth;
|
||||
@@ -662,7 +691,9 @@ public:
|
||||
u32 polygon_offset_line_enable;
|
||||
u32 polygon_offset_fill_enable;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0xD);
|
||||
u32 patch_vertices;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0xC);
|
||||
|
||||
std::array<ScissorTest, NumViewports> scissor_test;
|
||||
|
||||
@@ -1386,17 +1417,22 @@ ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec_upload, 0x6C);
|
||||
ASSERT_REG_POSITION(data_upload, 0x6D);
|
||||
ASSERT_REG_POSITION(sync_info, 0xB2);
|
||||
ASSERT_REG_POSITION(tess_mode, 0xC8);
|
||||
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
|
||||
ASSERT_REG_POSITION(tess_level_inner, 0xCD);
|
||||
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
|
||||
ASSERT_REG_POSITION(rt, 0x200);
|
||||
ASSERT_REG_POSITION(viewport_transform, 0x280);
|
||||
ASSERT_REG_POSITION(viewports, 0x300);
|
||||
ASSERT_REG_POSITION(vertex_buffer, 0x35D);
|
||||
ASSERT_REG_POSITION(depth_mode, 0x35F);
|
||||
ASSERT_REG_POSITION(clear_color[0], 0x360);
|
||||
ASSERT_REG_POSITION(clear_depth, 0x364);
|
||||
ASSERT_REG_POSITION(clear_stencil, 0x368);
|
||||
ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
|
||||
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
|
||||
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
|
||||
ASSERT_REG_POSITION(patch_vertices, 0x373);
|
||||
ASSERT_REG_POSITION(scissor_test, 0x380);
|
||||
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
|
||||
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);
|
||||
|
||||
@@ -98,10 +98,11 @@ union Attribute {
|
||||
BitField<20, 10, u64> immediate;
|
||||
BitField<22, 2, u64> element;
|
||||
BitField<24, 6, Index> index;
|
||||
BitField<31, 1, u64> patch;
|
||||
BitField<47, 3, AttributeSize> size;
|
||||
|
||||
bool IsPhysical() const {
|
||||
return element == 0 && static_cast<u64>(index.Value()) == 0;
|
||||
return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
|
||||
}
|
||||
} fmt20;
|
||||
|
||||
@@ -799,6 +800,12 @@ union Instruction {
|
||||
BitField<40, 1, u64> invert;
|
||||
} popc;
|
||||
|
||||
union {
|
||||
BitField<41, 1, u64> sh;
|
||||
BitField<40, 1, u64> invert;
|
||||
BitField<48, 1, u64> is_signed;
|
||||
} flo;
|
||||
|
||||
union {
|
||||
BitField<39, 3, u64> pred;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
@@ -1439,6 +1446,26 @@ union Instruction {
|
||||
}
|
||||
} tlds;
|
||||
|
||||
union {
|
||||
BitField<28, 1, u64> is_array;
|
||||
BitField<29, 2, TextureType> texture_type;
|
||||
BitField<35, 1, u64> aoffi_flag;
|
||||
BitField<49, 1, u64> nodep_flag;
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::AOFFI:
|
||||
return aoffi_flag != 0;
|
||||
case TextureMiscMode::NODEP:
|
||||
return nodep_flag != 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} txd;
|
||||
|
||||
union {
|
||||
BitField<24, 2, StoreCacheManagement> cache_management;
|
||||
BitField<33, 3, ImageType> image_type;
|
||||
@@ -1632,6 +1659,8 @@ public:
|
||||
TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
|
||||
TMML_B, // Texture Mip Map Level
|
||||
TMML, // Texture Mip Map Level
|
||||
TXD, // Texture Gradient/Load with Derivates
|
||||
TXD_B, // Texture Gradient/Load with Derivates Bindless
|
||||
SUST, // Surface Store
|
||||
SULD, // Surface Load
|
||||
SUATOM, // Surface Atomic Operation
|
||||
@@ -1664,6 +1693,9 @@ public:
|
||||
ISCADD_C, // Scale and Add
|
||||
ISCADD_R,
|
||||
ISCADD_IMM,
|
||||
FLO_R,
|
||||
FLO_C,
|
||||
FLO_IMM,
|
||||
LEA_R1,
|
||||
LEA_R2,
|
||||
LEA_RZ,
|
||||
@@ -1727,6 +1759,10 @@ public:
|
||||
SHR_C,
|
||||
SHR_R,
|
||||
SHR_IMM,
|
||||
SHF_RIGHT_R,
|
||||
SHF_RIGHT_IMM,
|
||||
SHF_LEFT_R,
|
||||
SHF_LEFT_IMM,
|
||||
FMNMX_C,
|
||||
FMNMX_R,
|
||||
FMNMX_IMM,
|
||||
@@ -1924,6 +1960,8 @@ private:
|
||||
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
|
||||
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
|
||||
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
|
||||
INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
|
||||
INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
|
||||
INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
|
||||
INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
|
||||
INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
|
||||
@@ -1965,6 +2003,9 @@ private:
|
||||
INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
|
||||
INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
|
||||
INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
|
||||
INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
|
||||
INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
|
||||
INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
|
||||
INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
|
||||
INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
|
||||
INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
|
||||
@@ -2022,6 +2063,10 @@ private:
|
||||
INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
|
||||
INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
|
||||
INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
|
||||
INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
|
||||
INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
|
||||
INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
|
||||
INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
|
||||
INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
|
||||
INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
|
||||
INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
@@ -134,11 +135,13 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
||||
|
||||
Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||
const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
|
||||
const std::vector extensions = GetExtensions();
|
||||
|
||||
const bool is_nvidia = vendor == "NVIDIA Corporation";
|
||||
const bool is_amd = vendor == "ATI Technologies Inc.";
|
||||
const bool is_intel = vendor == "Intel";
|
||||
const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
|
||||
|
||||
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
@@ -152,7 +155,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||
has_variable_aoffi = TestVariableAoffi();
|
||||
has_component_indexing_bug = is_amd;
|
||||
has_precise_bug = TestPreciseBug();
|
||||
has_broken_compute = is_intel;
|
||||
has_broken_compute = is_intel_proprietary;
|
||||
has_fast_buffer_sub_data = is_nvidia;
|
||||
|
||||
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
|
||||
|
||||
@@ -1028,6 +1028,10 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
|
||||
flip_y = !flip_y;
|
||||
}
|
||||
state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
|
||||
state.clip_control.depth_mode =
|
||||
regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne
|
||||
? GL_ZERO_TO_ONE
|
||||
: GL_NEGATIVE_ONE_TO_ONE;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncClipEnabled(
|
||||
|
||||
@@ -49,8 +49,9 @@ class ExprDecompiler;
|
||||
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
|
||||
|
||||
struct TextureAoffi {};
|
||||
struct TextureDerivates {};
|
||||
using TextureArgument = std::pair<Type, Node>;
|
||||
using TextureIR = std::variant<TextureAoffi, TextureArgument>;
|
||||
using TextureIR = std::variant<TextureAoffi, TextureDerivates, TextureArgument>;
|
||||
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
||||
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
|
||||
@@ -1112,6 +1113,8 @@ private:
|
||||
expr += GenerateTextureArgument(*argument);
|
||||
} else if (std::holds_alternative<TextureAoffi>(variant)) {
|
||||
expr += GenerateTextureAoffi(meta->aoffi);
|
||||
} else if (std::holds_alternative<TextureDerivates>(variant)) {
|
||||
expr += GenerateTextureDerivates(meta->derivates);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
@@ -1181,6 +1184,36 @@ private:
|
||||
return expr;
|
||||
}
|
||||
|
||||
std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
|
||||
if (derivates.empty()) {
|
||||
return {};
|
||||
}
|
||||
constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
|
||||
std::string expr = ", ";
|
||||
const std::size_t components = derivates.size() / 2;
|
||||
std::string dx = coord_constructors.at(components - 1);
|
||||
std::string dy = coord_constructors.at(components - 1);
|
||||
dx += '(';
|
||||
dy += '(';
|
||||
|
||||
for (std::size_t index = 0; index < components; ++index) {
|
||||
const auto operand_x{derivates.at(index * 2)};
|
||||
const auto operand_y{derivates.at(index * 2 + 1)};
|
||||
dx += Visit(operand_x).AsFloat();
|
||||
dy += Visit(operand_y).AsFloat();
|
||||
|
||||
if (index + 1 < components) {
|
||||
dx += ", ";
|
||||
dy += ", ";
|
||||
}
|
||||
}
|
||||
dx += ')';
|
||||
dy += ')';
|
||||
expr += dx + ", " + dy;
|
||||
|
||||
return expr;
|
||||
}
|
||||
|
||||
std::string BuildIntegerCoordinates(Operation operation) {
|
||||
constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
|
||||
const std::size_t coords_count{operation.GetOperandsCount()};
|
||||
@@ -1450,6 +1483,11 @@ private:
|
||||
return GenerateUnary(operation, "bitCount", type, type);
|
||||
}
|
||||
|
||||
template <Type type>
|
||||
Expression BitMSB(Operation operation) {
|
||||
return GenerateUnary(operation, "findMSB", type, type);
|
||||
}
|
||||
|
||||
Expression HNegate(Operation operation) {
|
||||
const auto GetNegate = [&](std::size_t index) {
|
||||
return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
|
||||
@@ -1738,6 +1776,14 @@ private:
|
||||
return {std::move(expr), Type::Float};
|
||||
}
|
||||
|
||||
Expression TextureGradient(Operation operation) {
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}});
|
||||
return {std::move(expr) + GetSwizzle(meta->element), Type::Float};
|
||||
}
|
||||
|
||||
Expression ImageLoad(Operation operation) {
|
||||
if (!device.HasImageLoadFormatted()) {
|
||||
LOG_ERROR(Render_OpenGL,
|
||||
@@ -1869,6 +1915,10 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression InvocationId(Operation operation) {
|
||||
return {"gl_InvocationID", Type::Int};
|
||||
}
|
||||
|
||||
Expression YNegate(Operation operation) {
|
||||
return {"y_direction", Type::Float};
|
||||
}
|
||||
@@ -2003,6 +2053,7 @@ private:
|
||||
&GLSLDecompiler::BitfieldInsert<Type::Int>,
|
||||
&GLSLDecompiler::BitfieldExtract<Type::Int>,
|
||||
&GLSLDecompiler::BitCount<Type::Int>,
|
||||
&GLSLDecompiler::BitMSB<Type::Int>,
|
||||
|
||||
&GLSLDecompiler::Add<Type::Uint>,
|
||||
&GLSLDecompiler::Mul<Type::Uint>,
|
||||
@@ -2021,6 +2072,7 @@ private:
|
||||
&GLSLDecompiler::BitfieldInsert<Type::Uint>,
|
||||
&GLSLDecompiler::BitfieldExtract<Type::Uint>,
|
||||
&GLSLDecompiler::BitCount<Type::Uint>,
|
||||
&GLSLDecompiler::BitMSB<Type::Uint>,
|
||||
|
||||
&GLSLDecompiler::Add<Type::HalfFloat>,
|
||||
&GLSLDecompiler::Mul<Type::HalfFloat>,
|
||||
@@ -2084,6 +2136,7 @@ private:
|
||||
&GLSLDecompiler::TextureQueryDimensions,
|
||||
&GLSLDecompiler::TextureQueryLod,
|
||||
&GLSLDecompiler::TexelFetch,
|
||||
&GLSLDecompiler::TextureGradient,
|
||||
|
||||
&GLSLDecompiler::ImageLoad,
|
||||
&GLSLDecompiler::ImageStore,
|
||||
@@ -2104,6 +2157,7 @@ private:
|
||||
&GLSLDecompiler::EmitVertex,
|
||||
&GLSLDecompiler::EndPrimitive,
|
||||
|
||||
&GLSLDecompiler::InvocationId,
|
||||
&GLSLDecompiler::YNegate,
|
||||
&GLSLDecompiler::LocalInvocationId<0>,
|
||||
&GLSLDecompiler::LocalInvocationId<1>,
|
||||
|
||||
@@ -411,8 +411,9 @@ void OpenGLState::ApplyAlphaTest() {
|
||||
}
|
||||
|
||||
void OpenGLState::ApplyClipControl() {
|
||||
if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) {
|
||||
glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE);
|
||||
if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
|
||||
std::tie(clip_control.origin, clip_control.depth_mode))) {
|
||||
glClipControl(clip_control.origin, clip_control.depth_mode);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -150,6 +150,7 @@ public:
|
||||
|
||||
struct {
|
||||
GLenum origin = GL_LOWER_LEFT;
|
||||
GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
|
||||
} clip_control;
|
||||
|
||||
OpenGLState();
|
||||
|
||||
@@ -4,6 +4,17 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace vk {
|
||||
class DispatchLoaderDynamic;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
constexpr vk::DispatchLoaderDynamic* dont_use_me_dld = nullptr;
|
||||
}
|
||||
|
||||
#define VULKAN_HPP_DEFAULT_DISPATCHER (*::Vulkan::dont_use_me_dld)
|
||||
#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#include <vulkan/vulkan.hpp>
|
||||
|
||||
namespace Vulkan {
|
||||
@@ -41,5 +52,7 @@ using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
|
||||
using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
|
||||
using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
|
||||
using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
|
||||
using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>;
|
||||
using UniqueDebugUtilsMessengerEXT = UniqueHandle<vk::DebugUtilsMessengerEXT>;
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <bitset>
|
||||
#include <cstdlib>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <string_view>
|
||||
@@ -15,6 +16,15 @@ namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
namespace Alternatives {
|
||||
|
||||
constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
|
||||
vk::Format::eD16UnormS8Uint, vk::Format{}};
|
||||
constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
|
||||
vk::Format::eD32SfloatS8Uint, vk::Format{}};
|
||||
|
||||
} // namespace Alternatives
|
||||
|
||||
template <typename T>
|
||||
void SetNext(void**& next, T& data) {
|
||||
*next = &data;
|
||||
@@ -22,7 +32,7 @@ void SetNext(void**& next, T& data) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
|
||||
T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
|
||||
vk::PhysicalDeviceFeatures2 features;
|
||||
T extension_features;
|
||||
features.pNext = &extension_features;
|
||||
@@ -30,17 +40,14 @@ T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
|
||||
return extension_features;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
namespace Alternatives {
|
||||
|
||||
constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
|
||||
vk::Format::eD16UnormS8Uint, vk::Format{}};
|
||||
constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
|
||||
vk::Format::eD32SfloatS8Uint, vk::Format{}};
|
||||
constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}};
|
||||
|
||||
} // namespace Alternatives
|
||||
template <typename T>
|
||||
T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
|
||||
vk::PhysicalDeviceProperties2 properties;
|
||||
T extension_properties;
|
||||
properties.pNext = &extension_properties;
|
||||
physical.getProperties2(&properties, dldi);
|
||||
return extension_properties;
|
||||
}
|
||||
|
||||
constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
|
||||
switch (format) {
|
||||
@@ -53,8 +60,7 @@ constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
|
||||
}
|
||||
}
|
||||
|
||||
constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
|
||||
FormatType format_type) {
|
||||
vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, FormatType format_type) {
|
||||
switch (format_type) {
|
||||
case FormatType::Linear:
|
||||
return properties.linearTilingFeatures;
|
||||
@@ -67,11 +73,13 @@ constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properti
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface)
|
||||
: physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
|
||||
: physical{physical}, properties{physical.getProperties(dldi)},
|
||||
format_properties{GetFormatProperties(dldi, physical)} {
|
||||
SetupFamilies(dldi, surface);
|
||||
SetupProperties(dldi);
|
||||
SetupFeatures(dldi);
|
||||
}
|
||||
|
||||
@@ -89,12 +97,22 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
||||
features.depthClamp = true;
|
||||
features.samplerAnisotropy = true;
|
||||
features.largePoints = true;
|
||||
features.multiViewport = true;
|
||||
features.depthBiasClamp = true;
|
||||
features.geometryShader = true;
|
||||
features.tessellationShader = true;
|
||||
features.fragmentStoresAndAtomics = true;
|
||||
features.shaderImageGatherExtended = true;
|
||||
features.shaderStorageImageWriteWithoutFormat = true;
|
||||
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
|
||||
|
||||
vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor;
|
||||
vertex_divisor.vertexAttributeInstanceRateDivisor = true;
|
||||
vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true;
|
||||
SetNext(next, vertex_divisor);
|
||||
vk::PhysicalDevice16BitStorageFeaturesKHR bit16_storage;
|
||||
bit16_storage.uniformAndStorageBuffer16BitAccess = true;
|
||||
SetNext(next, bit16_storage);
|
||||
|
||||
vk::PhysicalDevice8BitStorageFeaturesKHR bit8_storage;
|
||||
bit8_storage.uniformAndStorageBuffer8BitAccess = true;
|
||||
SetNext(next, bit8_storage);
|
||||
|
||||
vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
|
||||
if (is_float16_supported) {
|
||||
@@ -120,6 +138,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
|
||||
}
|
||||
|
||||
if (!ext_depth_range_unrestricted) {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
|
||||
}
|
||||
|
||||
vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0,
|
||||
nullptr, static_cast<u32>(extensions.size()), extensions.data(),
|
||||
nullptr);
|
||||
@@ -135,16 +157,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
||||
logical = UniqueDevice(
|
||||
dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
|
||||
|
||||
if (khr_driver_properties) {
|
||||
vk::PhysicalDeviceDriverPropertiesKHR driver;
|
||||
vk::PhysicalDeviceProperties2 properties;
|
||||
properties.pNext = &driver;
|
||||
physical.getProperties2(&properties, dld);
|
||||
driver_id = driver.driverID;
|
||||
LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan, "Driver: Unknown");
|
||||
}
|
||||
CollectTelemetryParameters();
|
||||
|
||||
graphics_queue = logical->getQueue(graphics_family, 0, dld);
|
||||
present_queue = logical->getQueue(present_family, 0, dld);
|
||||
@@ -190,6 +203,18 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
|
||||
|
||||
bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
|
||||
const vk::DispatchLoaderDynamic& dldi) const {
|
||||
// Disable for now to avoid converting ASTC twice.
|
||||
return false;
|
||||
static constexpr std::array astc_formats = {
|
||||
vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock,
|
||||
vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc5x4SrgbBlock,
|
||||
vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock,
|
||||
vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock,
|
||||
vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock,
|
||||
vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
|
||||
vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock,
|
||||
vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock,
|
||||
vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock};
|
||||
if (!features.textureCompressionASTC_LDR) {
|
||||
return false;
|
||||
}
|
||||
@@ -197,12 +222,6 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
|
||||
vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
|
||||
vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
|
||||
vk::FormatFeatureFlagBits::eTransferDst};
|
||||
constexpr std::array astc_formats = {
|
||||
vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
|
||||
vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock,
|
||||
vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock,
|
||||
vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock,
|
||||
vk::Format::eAstc10x8SrgbBlock};
|
||||
for (const auto format : astc_formats) {
|
||||
const auto format_properties{physical.getFormatProperties(format, dldi)};
|
||||
if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
|
||||
@@ -225,11 +244,17 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
|
||||
|
||||
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface) {
|
||||
LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName);
|
||||
bool is_suitable = true;
|
||||
|
||||
constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME};
|
||||
constexpr std::array required_extensions = {
|
||||
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
|
||||
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
|
||||
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
|
||||
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
|
||||
};
|
||||
std::bitset<required_extensions.size()> available_extensions{};
|
||||
|
||||
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
|
||||
@@ -246,7 +271,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
if (available_extensions[i]) {
|
||||
continue;
|
||||
}
|
||||
LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
|
||||
LOG_ERROR(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
|
||||
is_suitable = false;
|
||||
}
|
||||
}
|
||||
@@ -263,7 +288,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
|
||||
}
|
||||
if (!has_graphics || !has_present) {
|
||||
LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue");
|
||||
LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
|
||||
is_suitable = false;
|
||||
}
|
||||
|
||||
@@ -273,8 +298,15 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
|
||||
constexpr u32 required_ubo_size = 65536;
|
||||
if (limits.maxUniformBufferRange < required_ubo_size) {
|
||||
LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)",
|
||||
limits.maxUniformBufferRange, required_ubo_size);
|
||||
LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required",
|
||||
limits.maxUniformBufferRange, required_ubo_size);
|
||||
is_suitable = false;
|
||||
}
|
||||
|
||||
constexpr u32 required_num_viewports = 16;
|
||||
if (limits.maxViewports < required_num_viewports) {
|
||||
LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required",
|
||||
limits.maxViewports, required_num_viewports);
|
||||
is_suitable = false;
|
||||
}
|
||||
|
||||
@@ -285,24 +317,32 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
std::make_pair(features.depthClamp, "depthClamp"),
|
||||
std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
|
||||
std::make_pair(features.largePoints, "largePoints"),
|
||||
std::make_pair(features.multiViewport, "multiViewport"),
|
||||
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
|
||||
std::make_pair(features.geometryShader, "geometryShader"),
|
||||
std::make_pair(features.tessellationShader, "tessellationShader"),
|
||||
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
|
||||
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
|
||||
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
|
||||
"shaderStorageImageWriteWithoutFormat"),
|
||||
};
|
||||
for (const auto& [supported, name] : feature_report) {
|
||||
if (supported) {
|
||||
continue;
|
||||
}
|
||||
LOG_INFO(Render_Vulkan, "Missing required feature: {}", name);
|
||||
LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
|
||||
is_suitable = false;
|
||||
}
|
||||
|
||||
if (!is_suitable) {
|
||||
LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName);
|
||||
}
|
||||
|
||||
return is_suitable;
|
||||
}
|
||||
|
||||
std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
|
||||
std::vector<const char*> extensions;
|
||||
extensions.reserve(7);
|
||||
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
|
||||
|
||||
const auto Test = [&](const vk::ExtensionProperties& extension,
|
||||
std::optional<std::reference_wrapper<bool>> status, const char* name,
|
||||
bool push) {
|
||||
@@ -317,13 +357,30 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||
}
|
||||
};
|
||||
|
||||
extensions.reserve(13);
|
||||
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
|
||||
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
|
||||
extensions.push_back(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME);
|
||||
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
|
||||
extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
|
||||
extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
|
||||
|
||||
[[maybe_unused]] const bool nsight =
|
||||
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
||||
bool khr_shader_float16_int8{};
|
||||
bool ext_subgroup_size_control{};
|
||||
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
|
||||
Test(extension, khr_uniform_buffer_standard_layout,
|
||||
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
|
||||
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
|
||||
Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true);
|
||||
Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
|
||||
Test(extension, ext_depth_range_unrestricted,
|
||||
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
|
||||
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
|
||||
Test(extension, ext_shader_viewport_index_layer,
|
||||
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
|
||||
Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
|
||||
false);
|
||||
}
|
||||
|
||||
if (khr_shader_float16_int8) {
|
||||
@@ -332,6 +389,23 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
if (ext_subgroup_size_control) {
|
||||
const auto features =
|
||||
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
|
||||
const auto properties =
|
||||
GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi);
|
||||
|
||||
is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize;
|
||||
|
||||
if (features.subgroupSizeControl && properties.minSubgroupSize <= GuestWarpSize &&
|
||||
properties.maxSubgroupSize >= GuestWarpSize) {
|
||||
extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
|
||||
guest_warp_stages = properties.requiredSubgroupSizeStages;
|
||||
}
|
||||
} else {
|
||||
is_warp_potentially_bigger = true;
|
||||
}
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
@@ -358,19 +432,23 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
|
||||
present_family = *present_family_;
|
||||
}
|
||||
|
||||
void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
|
||||
const auto props = physical.getProperties(dldi);
|
||||
device_type = props.deviceType;
|
||||
uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
|
||||
storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment);
|
||||
max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
|
||||
}
|
||||
|
||||
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
|
||||
const auto supported_features{physical.getFeatures(dldi)};
|
||||
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
|
||||
}
|
||||
|
||||
void VKDevice::CollectTelemetryParameters() {
|
||||
const auto driver = GetProperties<vk::PhysicalDeviceDriverPropertiesKHR>(physical, dld);
|
||||
driver_id = driver.driverID;
|
||||
vendor_name = driver.driverName;
|
||||
|
||||
const auto extensions = physical.enumerateDeviceExtensionProperties(nullptr, dld);
|
||||
reported_extensions.reserve(std::size(extensions));
|
||||
for (const auto& extension : extensions) {
|
||||
reported_extensions.push_back(extension.extensionName);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
|
||||
static const float QUEUE_PRIORITY = 1.0f;
|
||||
|
||||
@@ -385,50 +463,70 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
|
||||
|
||||
std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
|
||||
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
|
||||
constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
|
||||
vk::Format::eA8B8G8R8SnormPack32,
|
||||
vk::Format::eA8B8G8R8SrgbPack32,
|
||||
vk::Format::eB5G6R5UnormPack16,
|
||||
vk::Format::eA2B10G10R10UnormPack32,
|
||||
vk::Format::eR32G32B32A32Sfloat,
|
||||
vk::Format::eR16G16B16A16Uint,
|
||||
vk::Format::eR16G16Unorm,
|
||||
vk::Format::eR16G16Snorm,
|
||||
vk::Format::eR16G16Sfloat,
|
||||
vk::Format::eR16Unorm,
|
||||
vk::Format::eR8G8B8A8Srgb,
|
||||
vk::Format::eR8G8Unorm,
|
||||
vk::Format::eR8G8Snorm,
|
||||
vk::Format::eR8Unorm,
|
||||
vk::Format::eB10G11R11UfloatPack32,
|
||||
vk::Format::eR32Sfloat,
|
||||
vk::Format::eR16Sfloat,
|
||||
vk::Format::eR16G16B16A16Sfloat,
|
||||
vk::Format::eB8G8R8A8Unorm,
|
||||
vk::Format::eD32Sfloat,
|
||||
vk::Format::eD16Unorm,
|
||||
vk::Format::eD16UnormS8Uint,
|
||||
vk::Format::eD24UnormS8Uint,
|
||||
vk::Format::eD32SfloatS8Uint,
|
||||
vk::Format::eBc1RgbaUnormBlock,
|
||||
vk::Format::eBc2UnormBlock,
|
||||
vk::Format::eBc3UnormBlock,
|
||||
vk::Format::eBc4UnormBlock,
|
||||
vk::Format::eBc5UnormBlock,
|
||||
vk::Format::eBc5SnormBlock,
|
||||
vk::Format::eBc7UnormBlock,
|
||||
vk::Format::eBc1RgbaSrgbBlock,
|
||||
vk::Format::eBc3SrgbBlock,
|
||||
vk::Format::eBc7SrgbBlock,
|
||||
vk::Format::eAstc4x4UnormBlock,
|
||||
vk::Format::eAstc4x4SrgbBlock,
|
||||
vk::Format::eAstc8x8SrgbBlock,
|
||||
vk::Format::eAstc8x6SrgbBlock,
|
||||
vk::Format::eAstc5x4SrgbBlock,
|
||||
vk::Format::eAstc5x5UnormBlock,
|
||||
vk::Format::eAstc5x5SrgbBlock,
|
||||
vk::Format::eAstc10x8UnormBlock,
|
||||
vk::Format::eAstc10x8SrgbBlock};
|
||||
static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
|
||||
vk::Format::eA8B8G8R8SnormPack32,
|
||||
vk::Format::eA8B8G8R8SrgbPack32,
|
||||
vk::Format::eB5G6R5UnormPack16,
|
||||
vk::Format::eA2B10G10R10UnormPack32,
|
||||
vk::Format::eA1R5G5B5UnormPack16,
|
||||
vk::Format::eR32G32B32A32Sfloat,
|
||||
vk::Format::eR32G32B32A32Uint,
|
||||
vk::Format::eR32G32Sfloat,
|
||||
vk::Format::eR32G32Uint,
|
||||
vk::Format::eR16G16B16A16Uint,
|
||||
vk::Format::eR16G16B16A16Unorm,
|
||||
vk::Format::eR16G16Unorm,
|
||||
vk::Format::eR16G16Snorm,
|
||||
vk::Format::eR16G16Sfloat,
|
||||
vk::Format::eR16Unorm,
|
||||
vk::Format::eR8G8B8A8Srgb,
|
||||
vk::Format::eR8G8Unorm,
|
||||
vk::Format::eR8G8Snorm,
|
||||
vk::Format::eR8Unorm,
|
||||
vk::Format::eR8Uint,
|
||||
vk::Format::eB10G11R11UfloatPack32,
|
||||
vk::Format::eR32Sfloat,
|
||||
vk::Format::eR32Uint,
|
||||
vk::Format::eR16Sfloat,
|
||||
vk::Format::eR16G16B16A16Sfloat,
|
||||
vk::Format::eB8G8R8A8Unorm,
|
||||
vk::Format::eR4G4B4A4UnormPack16,
|
||||
vk::Format::eD32Sfloat,
|
||||
vk::Format::eD16Unorm,
|
||||
vk::Format::eD16UnormS8Uint,
|
||||
vk::Format::eD24UnormS8Uint,
|
||||
vk::Format::eD32SfloatS8Uint,
|
||||
vk::Format::eBc1RgbaUnormBlock,
|
||||
vk::Format::eBc2UnormBlock,
|
||||
vk::Format::eBc3UnormBlock,
|
||||
vk::Format::eBc4UnormBlock,
|
||||
vk::Format::eBc5UnormBlock,
|
||||
vk::Format::eBc5SnormBlock,
|
||||
vk::Format::eBc7UnormBlock,
|
||||
vk::Format::eBc6HUfloatBlock,
|
||||
vk::Format::eBc6HSfloatBlock,
|
||||
vk::Format::eBc1RgbaSrgbBlock,
|
||||
vk::Format::eBc3SrgbBlock,
|
||||
vk::Format::eBc7SrgbBlock,
|
||||
vk::Format::eAstc4x4SrgbBlock,
|
||||
vk::Format::eAstc8x8SrgbBlock,
|
||||
vk::Format::eAstc8x5SrgbBlock,
|
||||
vk::Format::eAstc5x4SrgbBlock,
|
||||
vk::Format::eAstc5x5UnormBlock,
|
||||
vk::Format::eAstc5x5SrgbBlock,
|
||||
vk::Format::eAstc10x8UnormBlock,
|
||||
vk::Format::eAstc10x8SrgbBlock,
|
||||
vk::Format::eAstc6x6UnormBlock,
|
||||
vk::Format::eAstc6x6SrgbBlock,
|
||||
vk::Format::eAstc10x10UnormBlock,
|
||||
vk::Format::eAstc10x10SrgbBlock,
|
||||
vk::Format::eAstc12x12UnormBlock,
|
||||
vk::Format::eAstc12x12SrgbBlock,
|
||||
vk::Format::eAstc8x6UnormBlock,
|
||||
vk::Format::eAstc8x6SrgbBlock,
|
||||
vk::Format::eAstc6x5UnormBlock,
|
||||
vk::Format::eAstc6x5SrgbBlock,
|
||||
vk::Format::eE5B9G9R9UfloatPack32};
|
||||
std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
|
||||
for (const auto format : formats) {
|
||||
format_properties.emplace(format, physical.getFormatProperties(format, dldi));
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
@@ -14,6 +16,9 @@ namespace Vulkan {
|
||||
/// Format usage descriptor.
|
||||
enum class FormatType { Linear, Optimal, Buffer };
|
||||
|
||||
/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
|
||||
const u32 GuestWarpSize = 32;
|
||||
|
||||
/// Handles data specific to a physical device.
|
||||
class VKDevice final {
|
||||
public:
|
||||
@@ -71,7 +76,22 @@ public:
|
||||
|
||||
/// Returns true if the device is integrated with the host CPU.
|
||||
bool IsIntegrated() const {
|
||||
return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
|
||||
return properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
|
||||
}
|
||||
|
||||
/// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
|
||||
u32 GetApiVersion() const {
|
||||
return properties.apiVersion;
|
||||
}
|
||||
|
||||
/// Returns the current driver version provided in Vulkan-formatted version numbers.
|
||||
u32 GetDriverVersion() const {
|
||||
return properties.driverVersion;
|
||||
}
|
||||
|
||||
/// Returns the device name.
|
||||
std::string_view GetModelName() const {
|
||||
return properties.deviceName;
|
||||
}
|
||||
|
||||
/// Returns the driver ID.
|
||||
@@ -80,18 +100,23 @@ public:
|
||||
}
|
||||
|
||||
/// Returns uniform buffer alignment requeriment.
|
||||
u64 GetUniformBufferAlignment() const {
|
||||
return uniform_buffer_alignment;
|
||||
vk::DeviceSize GetUniformBufferAlignment() const {
|
||||
return properties.limits.minUniformBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
/// Returns storage alignment requeriment.
|
||||
u64 GetStorageBufferAlignment() const {
|
||||
return storage_buffer_alignment;
|
||||
vk::DeviceSize GetStorageBufferAlignment() const {
|
||||
return properties.limits.minStorageBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
/// Returns the maximum range for storage buffers.
|
||||
u64 GetMaxStorageBufferRange() const {
|
||||
return max_storage_buffer_range;
|
||||
vk::DeviceSize GetMaxStorageBufferRange() const {
|
||||
return properties.limits.maxStorageBufferRange;
|
||||
}
|
||||
|
||||
/// Returns the maximum size for push constants.
|
||||
vk::DeviceSize GetMaxPushConstantsSize() const {
|
||||
return properties.limits.maxPushConstantsSize;
|
||||
}
|
||||
|
||||
/// Returns true if ASTC is natively supported.
|
||||
@@ -104,6 +129,16 @@ public:
|
||||
return is_float16_supported;
|
||||
}
|
||||
|
||||
/// Returns true if the device warp size can potentially be bigger than guest's warp size.
|
||||
bool IsWarpSizePotentiallyBiggerThanGuest() const {
|
||||
return is_warp_potentially_bigger;
|
||||
}
|
||||
|
||||
/// Returns true if the device can be forced to use the guest warp size.
|
||||
bool IsGuestWarpSizeSupported(vk::ShaderStageFlagBits stage) const {
|
||||
return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_scalar_block_layout.
|
||||
bool IsKhrUniformBufferStandardLayoutSupported() const {
|
||||
return khr_uniform_buffer_standard_layout;
|
||||
@@ -114,6 +149,26 @@ public:
|
||||
return ext_index_type_uint8;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
|
||||
bool IsExtDepthRangeUnrestrictedSupported() const {
|
||||
return ext_depth_range_unrestricted;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
|
||||
bool IsExtShaderViewportIndexLayerSupported() const {
|
||||
return ext_shader_viewport_index_layer;
|
||||
}
|
||||
|
||||
/// Returns the vendor name reported from Vulkan.
|
||||
std::string_view GetVendorName() const {
|
||||
return vendor_name;
|
||||
}
|
||||
|
||||
/// Returns the list of available extensions.
|
||||
const std::vector<std::string>& GetAvailableExtensions() const {
|
||||
return reported_extensions;
|
||||
}
|
||||
|
||||
/// Checks if the physical device is suitable.
|
||||
static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface);
|
||||
@@ -125,12 +180,12 @@ private:
|
||||
/// Sets up queue families.
|
||||
void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
|
||||
|
||||
/// Sets up device properties.
|
||||
void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
|
||||
|
||||
/// Sets up device features.
|
||||
void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);
|
||||
|
||||
/// Collects telemetry information from the device.
|
||||
void CollectTelemetryParameters();
|
||||
|
||||
/// Returns a list of queue initialization descriptors.
|
||||
std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
|
||||
|
||||
@@ -148,23 +203,28 @@ private:
|
||||
|
||||
const vk::PhysicalDevice physical; ///< Physical device.
|
||||
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
|
||||
vk::PhysicalDeviceProperties properties; ///< Device properties.
|
||||
UniqueDevice logical; ///< Logical device.
|
||||
vk::Queue graphics_queue; ///< Main graphics queue.
|
||||
vk::Queue present_queue; ///< Main present queue.
|
||||
u32 graphics_family{}; ///< Main graphics queue family index.
|
||||
u32 present_family{}; ///< Main present queue family index.
|
||||
vk::PhysicalDeviceType device_type; ///< Physical device type.
|
||||
vk::DriverIdKHR driver_id{}; ///< Driver ID.
|
||||
u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
|
||||
u64 storage_buffer_alignment{}; ///< Storage buffer alignment requeriment.
|
||||
u64 max_storage_buffer_range{}; ///< Max storage buffer size.
|
||||
vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
|
||||
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
|
||||
bool is_float16_supported{}; ///< Support for float16 arithmetics.
|
||||
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
||||
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
|
||||
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
|
||||
bool khr_driver_properties{}; ///< Support for VK_KHR_driver_properties.
|
||||
std::unordered_map<vk::Format, vk::FormatProperties>
|
||||
format_properties; ///< Format properties dictionary.
|
||||
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
|
||||
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
|
||||
|
||||
// Telemetry parameters
|
||||
std::string vendor_name; ///< Device's driver name.
|
||||
std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
|
||||
|
||||
/// Format properties dictionary.
|
||||
std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,29 +5,28 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <sirit/sirit.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class ShaderIR;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
class VKDevice;
|
||||
}
|
||||
|
||||
namespace Vulkan::VKShader {
|
||||
namespace Vulkan {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using TexelBufferEntry = VideoCommon::Shader::Sampler;
|
||||
using SamplerEntry = VideoCommon::Shader::Sampler;
|
||||
using ImageEntry = VideoCommon::Shader::Image;
|
||||
|
||||
constexpr u32 DESCRIPTOR_SET = 0;
|
||||
|
||||
@@ -46,39 +45,74 @@ private:
|
||||
|
||||
class GlobalBufferEntry {
|
||||
public:
|
||||
explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset)
|
||||
: cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
|
||||
constexpr explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset, bool is_written)
|
||||
: cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_written{is_written} {}
|
||||
|
||||
u32 GetCbufIndex() const {
|
||||
constexpr u32 GetCbufIndex() const {
|
||||
return cbuf_index;
|
||||
}
|
||||
|
||||
u32 GetCbufOffset() const {
|
||||
constexpr u32 GetCbufOffset() const {
|
||||
return cbuf_offset;
|
||||
}
|
||||
|
||||
constexpr bool IsWritten() const {
|
||||
return is_written;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 cbuf_index{};
|
||||
u32 cbuf_offset{};
|
||||
bool is_written{};
|
||||
};
|
||||
|
||||
struct ShaderEntries {
|
||||
u32 const_buffers_base_binding{};
|
||||
u32 global_buffers_base_binding{};
|
||||
u32 samplers_base_binding{};
|
||||
u32 NumBindings() const {
|
||||
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
|
||||
texel_buffers.size() + samplers.size() + images.size());
|
||||
}
|
||||
|
||||
std::vector<ConstBufferEntry> const_buffers;
|
||||
std::vector<GlobalBufferEntry> global_buffers;
|
||||
std::vector<TexelBufferEntry> texel_buffers;
|
||||
std::vector<SamplerEntry> samplers;
|
||||
std::vector<ImageEntry> images;
|
||||
std::set<u32> attributes;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
std::size_t shader_length{};
|
||||
Sirit::Id entry_function{};
|
||||
std::vector<Sirit::Id> interfaces;
|
||||
bool uses_warps{};
|
||||
};
|
||||
|
||||
using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
|
||||
struct Specialization final {
|
||||
u32 base_binding{};
|
||||
|
||||
DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
Tegra::Engines::ShaderType stage);
|
||||
// Compute specific
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
u32 shared_memory_size{};
|
||||
|
||||
} // namespace Vulkan::VKShader
|
||||
// Graphics specific
|
||||
Maxwell::PrimitiveTopology primitive_topology{};
|
||||
std::optional<float> point_size{};
|
||||
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
|
||||
|
||||
// Tessellation specific
|
||||
struct {
|
||||
Maxwell::TessellationPrimitive primitive{};
|
||||
Maxwell::TessellationSpacing spacing{};
|
||||
bool clockwise{};
|
||||
} tessellation;
|
||||
};
|
||||
// Old gcc versions don't consider this trivially copyable.
|
||||
// static_assert(std::is_trivially_copyable_v<Specialization>);
|
||||
|
||||
struct SPIRVShader {
|
||||
std::vector<u32> code;
|
||||
ShaderEntries entries;
|
||||
};
|
||||
|
||||
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
|
||||
|
||||
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
Tegra::Engines::ShaderType stage, const Specialization& specialization);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -19,12 +19,18 @@
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) {
|
||||
|
||||
vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats,
|
||||
bool srgb) {
|
||||
if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
|
||||
return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear};
|
||||
vk::SurfaceFormatKHR format;
|
||||
format.format = vk::Format::eB8G8R8A8Unorm;
|
||||
format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
|
||||
return format;
|
||||
}
|
||||
const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
|
||||
return format.format == vk::Format::eB8G8R8A8Unorm &&
|
||||
const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
|
||||
const auto request_format = srgb ? vk::Format::eB8G8R8A8Srgb : vk::Format::eB8G8R8A8Unorm;
|
||||
return format.format == request_format &&
|
||||
format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear;
|
||||
});
|
||||
return found != formats.end() ? *found : formats[0];
|
||||
@@ -51,28 +57,26 @@ vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u3
|
||||
std::min(capabilities.maxImageExtent.height, extent.height));
|
||||
return extent;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device)
|
||||
: surface{surface}, device{device} {}
|
||||
|
||||
VKSwapchain::~VKSwapchain() = default;
|
||||
|
||||
void VKSwapchain::Create(u32 width, u32 height) {
|
||||
const auto dev = device.GetLogical();
|
||||
void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
const auto physical_device = device.GetPhysical();
|
||||
|
||||
const vk::SurfaceCapabilitiesKHR capabilities{
|
||||
physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
|
||||
const auto capabilities{physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
|
||||
if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dev.waitIdle(dld);
|
||||
device.GetLogical().waitIdle(dld);
|
||||
Destroy();
|
||||
|
||||
CreateSwapchain(capabilities, width, height);
|
||||
CreateSwapchain(capabilities, width, height, srgb);
|
||||
CreateSemaphores();
|
||||
CreateImageViews();
|
||||
|
||||
@@ -107,7 +111,7 @@ bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
|
||||
break;
|
||||
case vk::Result::eErrorOutOfDateKHR:
|
||||
if (current_width > 0 && current_height > 0) {
|
||||
Create(current_width, current_height);
|
||||
Create(current_width, current_height, current_srgb);
|
||||
recreated = true;
|
||||
}
|
||||
break;
|
||||
@@ -129,23 +133,19 @@ bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebu
|
||||
}
|
||||
|
||||
void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
|
||||
u32 height) {
|
||||
const auto dev{device.GetLogical()};
|
||||
u32 height, bool srgb) {
|
||||
const auto& dld{device.GetDispatchLoader()};
|
||||
const auto physical_device{device.GetPhysical()};
|
||||
const auto formats{physical_device.getSurfaceFormatsKHR(surface, dld)};
|
||||
const auto present_modes{physical_device.getSurfacePresentModesKHR(surface, dld)};
|
||||
|
||||
const std::vector<vk::SurfaceFormatKHR> formats{
|
||||
physical_device.getSurfaceFormatsKHR(surface, dld)};
|
||||
|
||||
const std::vector<vk::PresentModeKHR> present_modes{
|
||||
physical_device.getSurfacePresentModesKHR(surface, dld)};
|
||||
|
||||
const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
|
||||
const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
|
||||
const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
|
||||
extent = ChooseSwapExtent(capabilities, width, height);
|
||||
|
||||
current_width = extent.width;
|
||||
current_height = extent.height;
|
||||
current_srgb = srgb;
|
||||
|
||||
u32 requested_image_count{capabilities.minImageCount + 1};
|
||||
if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
|
||||
@@ -169,6 +169,7 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
|
||||
swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
|
||||
}
|
||||
|
||||
const auto dev{device.GetLogical()};
|
||||
swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
|
||||
|
||||
images = dev.getSwapchainImagesKHR(*swapchain, dld);
|
||||
|
||||
@@ -24,7 +24,7 @@ public:
|
||||
~VKSwapchain();
|
||||
|
||||
/// Creates (or recreates) the swapchain with a given size.
|
||||
void Create(u32 width, u32 height);
|
||||
void Create(u32 width, u32 height, bool srgb);
|
||||
|
||||
/// Acquires the next image in the swapchain, waits as needed.
|
||||
void AcquireNextImage();
|
||||
@@ -60,8 +60,13 @@ public:
|
||||
return image_format;
|
||||
}
|
||||
|
||||
bool GetSrgbState() const {
|
||||
return current_srgb;
|
||||
}
|
||||
|
||||
private:
|
||||
void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height);
|
||||
void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
|
||||
bool srgb);
|
||||
void CreateSemaphores();
|
||||
void CreateImageViews();
|
||||
|
||||
@@ -87,6 +92,7 @@ private:
|
||||
|
||||
u32 current_width{};
|
||||
u32 current_height{};
|
||||
bool current_srgb{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -130,6 +130,25 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::FLO_R:
|
||||
case OpCode::Id::FLO_C:
|
||||
case OpCode::Id::FLO_IMM: {
|
||||
Node value;
|
||||
if (instr.flo.invert) {
|
||||
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
|
||||
}
|
||||
if (instr.flo.is_signed) {
|
||||
value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
|
||||
} else {
|
||||
value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
|
||||
}
|
||||
if (instr.flo.sh) {
|
||||
value =
|
||||
Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
|
||||
}
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::SEL_C:
|
||||
case OpCode::Id::SEL_R:
|
||||
case OpCode::Id::SEL_IMM: {
|
||||
|
||||
@@ -21,6 +21,7 @@ using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
namespace {
|
||||
|
||||
u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
|
||||
switch (uniform_type) {
|
||||
case Tegra::Shader::UniformType::Single:
|
||||
@@ -35,6 +36,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
@@ -196,28 +198,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
|
||||
"Unaligned attribute loads are not supported");
|
||||
|
||||
u64 next_element = instr.attribute.fmt20.element;
|
||||
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
|
||||
u64 element = instr.attribute.fmt20.element;
|
||||
auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
|
||||
|
||||
const auto StoreNextElement = [&](u32 reg_offset) {
|
||||
const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
|
||||
next_element, GetRegister(instr.gpr39));
|
||||
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
|
||||
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
|
||||
Node dest;
|
||||
if (instr.attribute.fmt20.patch) {
|
||||
const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
|
||||
dest = MakeNode<PatchNode>(offset);
|
||||
} else {
|
||||
dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
|
||||
GetRegister(instr.gpr39));
|
||||
}
|
||||
const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
|
||||
|
||||
bb.push_back(Operation(OperationCode::Assign, dest, src));
|
||||
|
||||
// Load the next attribute element into the following register. If the element
|
||||
// to load goes beyond the vec4 size, load the first element of the next
|
||||
// attribute.
|
||||
next_element = (next_element + 1) % 4;
|
||||
next_index = next_index + (next_element == 0 ? 1 : 0);
|
||||
};
|
||||
|
||||
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
|
||||
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
|
||||
StoreNextElement(reg_offset);
|
||||
// Load the next attribute element into the following register. If the element to load
|
||||
// goes beyond the vec4 size, load the first element of the next attribute.
|
||||
element = (element + 1) % 4;
|
||||
index = index + (element == 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ST_L:
|
||||
|
||||
@@ -69,6 +69,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
case OpCode::Id::MOV_SYS: {
|
||||
const Node value = [this, instr] {
|
||||
switch (instr.sys20) {
|
||||
case SystemVariable::InvocationId:
|
||||
return Operation(OperationCode::InvocationId);
|
||||
case SystemVariable::Ydirection:
|
||||
return Operation(OperationCode::YNegate);
|
||||
case SystemVariable::InvocationInfo:
|
||||
|
||||
@@ -134,13 +134,52 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, component, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, values, true);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXD_B:
|
||||
is_bindless = true;
|
||||
[[fallthrough]];
|
||||
case OpCode::Id::TXD: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented");
|
||||
|
||||
u64 base_reg = instr.gpr8.Value();
|
||||
const auto derivate_reg = instr.gpr20.Value();
|
||||
const auto texture_type = instr.txd.texture_type.Value();
|
||||
const auto coord_count = GetCoordCount(texture_type);
|
||||
|
||||
const auto& sampler = is_bindless
|
||||
? GetBindlessSampler(base_reg, {{texture_type, false, false}})
|
||||
: GetSampler(instr.sampler, {{texture_type, false, false}});
|
||||
if (is_bindless) {
|
||||
base_reg++;
|
||||
}
|
||||
|
||||
std::vector<Node> coords;
|
||||
std::vector<Node> derivates;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(base_reg + i));
|
||||
const std::size_t derivate = i * 2;
|
||||
derivates.push_back(GetRegister(derivate_reg + derivate));
|
||||
derivates.push_back(GetRegister(derivate_reg + derivate + 1));
|
||||
}
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
MetaTexture meta{sampler, {}, {}, {}, derivates, {}, {}, {}, element};
|
||||
values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
|
||||
}
|
||||
|
||||
WriteTexInstructionFloat(bb, instr, values);
|
||||
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXQ_B:
|
||||
is_bindless = true;
|
||||
[[fallthrough]];
|
||||
@@ -158,7 +197,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta,
|
||||
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
|
||||
@@ -212,7 +251,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
continue;
|
||||
}
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporary(bb, indexer++, value);
|
||||
}
|
||||
@@ -442,7 +481,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto copy_coords = coords;
|
||||
MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
|
||||
MetaTexture meta{sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
|
||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||
}
|
||||
|
||||
@@ -574,7 +613,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component,
|
||||
MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component,
|
||||
element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
@@ -608,7 +647,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
|
||||
MetaTexture meta{sampler, array_register, {}, {}, {}, {}, lod, {}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
@@ -653,7 +692,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
|
||||
MetaTexture meta{sampler, array, {}, {}, {}, {}, lod, {}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
|
||||
@@ -38,6 +38,9 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
// Signal the backend that this shader uses warp instructions.
|
||||
uses_warps = true;
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::VOTE: {
|
||||
const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
|
||||
|
||||
@@ -68,6 +68,7 @@ enum class OperationCode {
|
||||
IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
|
||||
IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
|
||||
IBitCount, /// (MetaArithmetic, int) -> int
|
||||
IBitMSB, /// (MetaArithmetic, int) -> int
|
||||
|
||||
UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UMul, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
@@ -86,6 +87,7 @@ enum class OperationCode {
|
||||
UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
|
||||
UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
|
||||
UBitCount, /// (MetaArithmetic, uint) -> uint
|
||||
UBitMSB, /// (MetaArithmetic, uint) -> uint
|
||||
|
||||
HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
|
||||
HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
|
||||
@@ -149,6 +151,7 @@ enum class OperationCode {
|
||||
TextureQueryDimensions, /// (MetaTexture, float a) -> float4
|
||||
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
|
||||
TexelFetch, /// (MetaTexture, int[N], int) -> float4
|
||||
TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
|
||||
|
||||
ImageLoad, /// (MetaImage, int[N] coords) -> void
|
||||
ImageStore, /// (MetaImage, int[N] coords) -> void
|
||||
@@ -169,6 +172,7 @@ enum class OperationCode {
|
||||
EmitVertex, /// () -> void
|
||||
EndPrimitive, /// () -> void
|
||||
|
||||
InvocationId, /// () -> int
|
||||
YNegate, /// () -> float
|
||||
LocalInvocationIdX, /// () -> uint
|
||||
LocalInvocationIdY, /// () -> uint
|
||||
@@ -210,13 +214,14 @@ class PredicateNode;
|
||||
class AbufNode;
|
||||
class CbufNode;
|
||||
class LmemNode;
|
||||
class PatchNode;
|
||||
class SmemNode;
|
||||
class GmemNode;
|
||||
class CommentNode;
|
||||
|
||||
using NodeData =
|
||||
std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
|
||||
PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>;
|
||||
using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
|
||||
InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
|
||||
LmemNode, SmemNode, GmemNode, CommentNode>;
|
||||
using Node = std::shared_ptr<NodeData>;
|
||||
using Node4 = std::array<Node, 4>;
|
||||
using NodeBlock = std::vector<Node>;
|
||||
@@ -367,6 +372,7 @@ struct MetaTexture {
|
||||
Node array;
|
||||
Node depth_compare;
|
||||
std::vector<Node> aoffi;
|
||||
std::vector<Node> derivates;
|
||||
Node bias;
|
||||
Node lod;
|
||||
Node component{};
|
||||
@@ -538,6 +544,19 @@ private:
|
||||
u32 element{};
|
||||
};
|
||||
|
||||
/// Patch memory (used to communicate tessellation stages).
|
||||
class PatchNode final {
|
||||
public:
|
||||
explicit PatchNode(u32 offset) : offset{offset} {}
|
||||
|
||||
u32 GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 offset{};
|
||||
};
|
||||
|
||||
/// Constant buffer node, usually mapped to uniform buffers in GLSL
|
||||
class CbufNode final {
|
||||
public:
|
||||
|
||||
@@ -137,6 +137,10 @@ public:
|
||||
return uses_vertex_id;
|
||||
}
|
||||
|
||||
bool UsesWarps() const {
|
||||
return uses_warps;
|
||||
}
|
||||
|
||||
bool HasPhysicalAttributes() const {
|
||||
return uses_physical_attributes;
|
||||
}
|
||||
@@ -415,6 +419,7 @@ private:
|
||||
bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
|
||||
bool uses_instance_id{};
|
||||
bool uses_vertex_id{};
|
||||
bool uses_warps{};
|
||||
|
||||
Tegra::Shader::Header header;
|
||||
};
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <variant>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/shader/node.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
Reference in New Issue
Block a user