Compare commits

..

1 Commits

Author SHA1 Message Date
Fernando Sahmkow
c03f0b3c89 Shader Recomnpiler: implement textuzreGrad 3D emulation constant propagation 2023-08-18 22:17:02 -04:00
20 changed files with 281 additions and 176 deletions

View File

@@ -38,7 +38,7 @@ namespace {
*/
void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority,
KProcessAddress stack_top) {
const KProcessAddress entry_point = owner_process.GetEntryPoint();
const KProcessAddress entry_point = owner_process.GetPageTable().GetCodeRegionStart();
ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::ThreadCountMax, 1));
KThread* thread = KThread::Create(system.Kernel());
@@ -358,21 +358,6 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
m_system_resource_size = metadata.GetSystemResourceSize();
m_image_size = code_size;
if (metadata.GetAddressSpaceType() == FileSys::ProgramAddressSpaceType::Is39Bit) {
// For 39-bit processes, the ASLR region starts at 0x800'0000 and is ~512GiB large.
// However, some (buggy) programs/libraries like skyline incorrectly depend on the
// existence of ASLR pages before the entry point, so we will adjust the load address
// to point to about 2GiB into the ASLR region.
m_code_address = 0x8000'0000;
} else {
// All other processes can be mapped at the beginning of the code region.
if (metadata.GetAddressSpaceType() == FileSys::ProgramAddressSpaceType::Is36Bit) {
m_code_address = 0x800'0000;
} else {
m_code_address = 0x20'0000;
}
}
KScopedResourceReservation memory_reservation(
m_resource_limit, LimitableResource::PhysicalMemoryMax, code_size + m_system_resource_size);
if (!memory_reservation.Succeeded()) {
@@ -383,15 +368,15 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
// Initialize process address space
if (const Result result{m_page_table.InitializeForProcess(
metadata.GetAddressSpaceType(), false, false, false, KMemoryManager::Pool::Application,
this->GetEntryPoint(), code_size, std::addressof(m_kernel.GetAppSystemResource()),
m_resource_limit, m_kernel.System().ApplicationMemory())};
0x8000000, code_size, std::addressof(m_kernel.GetAppSystemResource()), m_resource_limit,
m_kernel.System().ApplicationMemory())};
result.IsError()) {
R_RETURN(result);
}
// Map process code region
if (const Result result{m_page_table.MapProcessCode(this->GetEntryPoint(), code_size / PageSize,
KMemoryState::Code,
if (const Result result{m_page_table.MapProcessCode(m_page_table.GetCodeRegionStart(),
code_size / PageSize, KMemoryState::Code,
KMemoryPermission::None)};
result.IsError()) {
R_RETURN(result);

View File

@@ -177,10 +177,6 @@ public:
return m_program_id;
}
KProcessAddress GetEntryPoint() const {
return m_code_address;
}
/// Gets the resource limit descriptor for this process
KResourceLimit* GetResourceLimit() const;
@@ -489,9 +485,6 @@ private:
/// Address indicating the location of the process' dedicated TLS region.
KProcessAddress m_plr_address = 0;
/// Address indicating the location of the process's entry point.
KProcessAddress m_code_address = 0;
/// Random values for svcGetInfo RandomEntropy
std::array<u64, RANDOM_ENTROPY_SIZE> m_random_entropy{};

View File

@@ -153,7 +153,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
// Load NSO modules
modules.clear();
const VAddr base_address{GetInteger(process.GetEntryPoint())};
const VAddr base_address{GetInteger(process.GetPageTable().GetCodeRegionStart())};
VAddr next_load_addr{base_address};
const FileSys::PatchManager pm{metadata.GetTitleID(), system.GetFileSystemController(),
system.GetContentProvider()};

View File

@@ -96,7 +96,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::KProcess& process,
}
codeset.memory = std::move(program_image);
const VAddr base_address = GetInteger(process.GetEntryPoint());
const VAddr base_address = GetInteger(process.GetPageTable().GetCodeRegionStart());
process.LoadModule(std::move(codeset), base_address);
LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", kip->GetName(), base_address);

View File

@@ -203,7 +203,7 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data)
// Load codeset for current process
codeset.memory = std::move(program_image);
process.LoadModule(std::move(codeset), process.GetEntryPoint());
process.LoadModule(std::move(codeset), process.GetPageTable().GetCodeRegionStart());
return true;
}

View File

@@ -167,7 +167,7 @@ AppLoader_NSO::LoadResult AppLoader_NSO::Load(Kernel::KProcess& process, Core::S
modules.clear();
// Load module
const VAddr base_address = GetInteger(process.GetEntryPoint());
const VAddr base_address = GetInteger(process.GetPageTable().GetCodeRegionStart());
if (!LoadModule(process, system, *file, base_address, true, true)) {
return {ResultStatus::ErrorLoadingNSO, {}};
}

View File

@@ -117,8 +117,8 @@ json GetProcessorStateDataAuto(Core::System& system) {
arm.SaveContext(context);
return GetProcessorStateData(process->Is64BitProcess() ? "AArch64" : "AArch32",
GetInteger(process->GetEntryPoint()), context.sp, context.pc,
context.pstate, context.cpu_registers);
GetInteger(process->GetPageTable().GetCodeRegionStart()),
context.sp, context.pc, context.pstate, context.cpu_registers);
}
json GetBacktraceData(Core::System& system) {

View File

@@ -558,12 +558,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& derivatives,
const IR::Value& offset, const IR::Value& lod_clamp) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
ScopedRegister dpdx, dpdy;
ScopedRegister dpdx, dpdy, coords;
const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
if (multi_component) {
// Allocate this early to avoid aliasing other registers
dpdx = ScopedRegister{ctx.reg_alloc};
dpdy = ScopedRegister{ctx.reg_alloc};
if (info.num_derivates >= 3) {
coords = ScopedRegister{ctx.reg_alloc};
}
}
const auto sparse_inst{PrepareSparse(inst)};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
@@ -580,15 +583,27 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
"MOV.F {}.y,{}.w;",
dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
dpdy.reg, derivatives_vec);
Register final_coord;
if (info.num_derivates >= 3) {
ctx.Add("MOV.F {}.z,{}.x;"
"MOV.F {}.z,{}.y;",
dpdx.reg, coord_vec, dpdy.reg, coord_vec);
ctx.Add("MOV.F {}.x,0;"
"MOV.F {}.y,0;",
"MOV.F {}.z,0;", coords.reg, coords.reg, coords.reg);
final_coord = coords.reg;
} else {
final_coord = coord_vec;
}
if (info.has_lod_clamp) {
const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
ctx.Add("MOV.F {}.w,{};"
"TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
dpdy.reg, lod_clamp_value, sparse_mod, ret, final_coord, dpdx.reg, dpdy.reg,
texture, type, offset_vec);
} else {
ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
texture, type, offset_vec);
ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, final_coord, dpdx.reg,
dpdy.reg, texture, type, offset_vec);
}
} else {
ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,

View File

@@ -548,7 +548,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
if (sparse_inst) {
throw NotImplementedException("EmitImageGradient Sparse");
}
if (!offset.IsEmpty()) {
if (!offset.IsEmpty() && info.num_derivates <= 2) {
throw NotImplementedException("EmitImageGradient offset");
}
const auto texture{Texture(ctx, info, index)};
@@ -556,6 +556,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
if (multi_component) {
if (info.num_derivates >= 3) {
const auto offset_vec{ctx.var_alloc.Consume(offset)};
ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yz, {}.y));", texel, texture,
coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec);
return;
}
ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords,
derivatives_vec, derivatives_vec);
} else {

View File

@@ -42,6 +42,7 @@ union TextureInstInfo {
BitField<23, 2, u32> gather_component;
BitField<25, 2, u32> num_derivates;
BitField<27, 3, ImageFormat> image_format;
BitField<30, 1, u32> ndv_is_active;
};
static_assert(sizeof(TextureInstInfo) <= sizeof(u32));

View File

@@ -19,7 +19,7 @@ void TranslatorVisitor::FSWZADD(u64 insn) {
} const fswzadd{insn};
if (fswzadd.ndv != 0) {
throw NotImplementedException("FSWZADD NDV");
LOG_WARNING(Shader, "(STUBBED) FSWZADD - NDV mode");
}
const IR::F32 src_a{GetFloatReg8(insn)};

View File

@@ -16,8 +16,10 @@ void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = fa
BitField<12, 4, u64> mov32i_mask;
} const mov{insn};
if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
throw NotImplementedException("Non-full move mask");
u64 mask = is_mov32i ? mov.mov32i_mask : mov.mask;
if (mask != 0xf && mask != 0x1) {
LOG_WARNING(Shader, "(STUBBED) Masked Mov");
return;
}
v.X(mov.dest_reg, src);
}

View File

@@ -209,7 +209,7 @@ void TranslatorVisitor::R2B(u64) {
}
void TranslatorVisitor::RAM(u64) {
ThrowNotImplemented(Opcode::RAM);
LOG_WARNING(Shader, "(STUBBED) RAM Instruction");
}
void TranslatorVisitor::RET(u64) {
@@ -221,7 +221,7 @@ void TranslatorVisitor::RTT(u64) {
}
void TranslatorVisitor::SAM(u64) {
ThrowNotImplemented(Opcode::SAM);
LOG_WARNING(Shader, "(STUBBED) SAM Instruction");
}
void TranslatorVisitor::SETCRSPTR(u64) {

View File

@@ -172,6 +172,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
info.has_lod_clamp.Assign(lc ? 1 : 0);
info.ndv_is_active.Assign(tex.ndv != 0 ? 1 : 0);
const IR::Value sample{[&]() -> IR::Value {
if (tex.dc == 0) {

View File

@@ -10,6 +10,7 @@
#include "shader_recompiler/environment.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/ir_opt/passes.h"
@@ -410,7 +411,49 @@ void FoldSelect(IR::Inst& inst) {
}
}
void FoldFPAdd32(IR::Inst& inst) {
if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a + b; })) {
return;
}
const IR::Value lhs_value{inst.Arg(0)};
const IR::Value rhs_value{inst.Arg(1)};
const auto check_neutral = [](const IR::Value& one_operand) {
return one_operand.IsImmediate() && std::abs(one_operand.F32()) == 0.0f;
};
if (check_neutral(lhs_value)) {
inst.ReplaceUsesWith(rhs_value);
}
if (check_neutral(rhs_value)) {
inst.ReplaceUsesWith(lhs_value);
}
}
bool FoldDerivateYFromCorrection(IR::Inst& inst) {
const IR::Value lhs_value{inst.Arg(0)};
const IR::Value rhs_value{inst.Arg(1)};
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
IR::Inst* const rhs_op{rhs_value.InstRecursive()};
if (lhs_op->GetOpcode() == IR::Opcode::YDirection) {
if (rhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
return false;
}
inst.ReplaceUsesWith(rhs_value);
return true;
}
if (rhs_op->GetOpcode() != IR::Opcode::YDirection) {
return false;
}
if (lhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
return false;
}
inst.ReplaceUsesWith(lhs_value);
return true;
}
void FoldFPMul32(IR::Inst& inst) {
if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a * b; })) {
return;
}
const auto control{inst.Flags<IR::FpControl>()};
if (control.no_contraction) {
return;
@@ -421,6 +464,9 @@ void FoldFPMul32(IR::Inst& inst) {
if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
return;
}
if (FoldDerivateYFromCorrection(inst)) {
return;
}
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
IR::Inst* const rhs_op{rhs_value.InstRecursive()};
if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
@@ -622,7 +668,12 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
}
const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
if (value_2 != value_3) {
return;
if (!value_2.IsImmediate() || !value_3.IsImmediate()) {
return;
}
if (Common::BitCast<u32>(value_2.F32()) != value_3.U32()) {
return;
}
}
const IR::Value index{inst2->Arg(1)};
const IR::Value clamp{inst2->Arg(2)};
@@ -648,6 +699,169 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
}
}
bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) {
if (coord.IsImmediate()) {
return false;
}
const auto check_through_shuffle = [](IR::Value input, IR::Value& result) {
const IR::Value value_1{GetThroughCast(input.Resolve(), IR::Opcode::BitCastF32U32)};
IR::Inst* const inst2{value_1.InstRecursive()};
if (inst2->GetOpcode() != IR::Opcode::ShuffleIndex) {
return false;
}
const IR::Value index{inst2->Arg(1).Resolve()};
const IR::Value clamp{inst2->Arg(2).Resolve()};
const IR::Value segmentation_mask{inst2->Arg(3).Resolve()};
if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
return false;
}
if (index.U32() != 3 && clamp.U32() != 3) {
return false;
}
result = GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32);
return true;
};
IR::Inst* const inst = coord.InstRecursive();
if (inst->GetOpcode() != IR::Opcode::FSwizzleAdd) {
return false;
}
std::array<IR::Value, 3> temporary_values;
IR::Value value_1 = inst->Arg(0).Resolve();
IR::Value value_2 = inst->Arg(1).Resolve();
IR::Value value_3 = inst->Arg(2).Resolve();
std::array<u32, 4> swizzles_mask_a{};
std::array<u32, 4> swizzles_mask_b{};
const auto resolve_mask = [](std::array<u32, 4>& mask_results, IR::Value mask) {
u32 value = mask.U32();
for (size_t i = 0; i < 4; i++) {
mask_results[i] = (value >> (i * 2)) & 0x3;
}
};
resolve_mask(swizzles_mask_a, value_3);
size_t coordinate_index = 0;
const auto resolve_pending = [&](IR::Value resolve_v) {
IR::Inst* const inst_r = resolve_v.InstRecursive();
if (inst_r->GetOpcode() != IR::Opcode::FSwizzleAdd) {
return false;
}
if (!check_through_shuffle(inst_r->Arg(0).Resolve(), temporary_values[1])) {
return false;
}
if (!check_through_shuffle(inst_r->Arg(1).Resolve(), temporary_values[2])) {
return false;
}
resolve_mask(swizzles_mask_b, inst_r->Arg(2).Resolve());
return true;
};
if (value_1.IsImmediate() || value_2.IsImmediate()) {
return false;
}
bool should_continue = false;
if (resolve_pending(value_1)) {
should_continue = check_through_shuffle(value_2, temporary_values[0]);
coordinate_index = 0;
}
if (resolve_pending(value_2)) {
should_continue = check_through_shuffle(value_1, temporary_values[0]);
coordinate_index = 2;
}
if (!should_continue) {
return false;
}
// figure which is which
size_t zero_mask_a = 0;
size_t zero_mask_b = 0;
for (size_t i = 0; i < 4; i++) {
if (swizzles_mask_a[i] == 2 || swizzles_mask_b[i] == 2) {
// last operand can be inversed, we cannot determine a result.
return false;
}
zero_mask_a |= static_cast<size_t>(swizzles_mask_a[i] == 3 ? 1 : 0) << i;
zero_mask_b |= static_cast<size_t>(swizzles_mask_b[i] == 3 ? 1 : 0) << i;
}
static constexpr size_t ddx_pattern = 0b1010;
static constexpr size_t ddx_pattern_inv = ~ddx_pattern & 0b00001111;
if (std::popcount(zero_mask_a) != 2) {
return false;
}
if (std::popcount(zero_mask_b) != 2) {
return false;
}
if (zero_mask_a == zero_mask_b) {
return false;
}
results[0] = temporary_values[coordinate_index];
if (coordinate_index == 0) {
if (zero_mask_b == ddx_pattern || zero_mask_b == ddx_pattern_inv) {
results[1] = temporary_values[1];
results[2] = temporary_values[2];
return true;
}
results[2] = temporary_values[1];
results[1] = temporary_values[2];
} else {
const auto assign_result = [&results](IR::Value temporary_value, size_t mask) {
if (mask == ddx_pattern || mask == ddx_pattern_inv) {
results[1] = temporary_value;
return;
}
results[2] = temporary_value;
};
assign_result(temporary_values[1], zero_mask_b);
assign_result(temporary_values[0], zero_mask_a);
}
return true;
}
void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>();
auto orig_opcode = inst.GetOpcode();
if (info.ndv_is_active == 0) {
return;
}
if (info.type != TextureType::Color3D) {
return;
}
const IR::Value handle{inst.Arg(0)};
const IR::Value coords{inst.Arg(1)};
const IR::Value bias_lc{inst.Arg(2)};
const IR::Value offset{inst.Arg(3)};
if (!offset.IsImmediate()) {
return;
}
IR::Inst* const inst2 = coords.InstRecursive();
std::array<std::array<IR::Value, 3>, 3> results_matrix;
for (size_t i = 0; i < 3; i++) {
if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) {
return;
}
}
IR::F32 lod_clamp{};
if (info.has_lod_clamp != 0) {
if (!bias_lc.IsImmediate()) {
lod_clamp = IR::F32{bias_lc.InstRecursive()->Arg(1).Resolve()};
} else {
lod_clamp = IR::F32{bias_lc};
}
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
IR::Value new_coords =
ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]);
IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
results_matrix[1][1], results_matrix[1][2]);
IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
info.num_derivates.Assign(3);
IR::Value new_gradient_instruction =
ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
if (orig_opcode == IR::Opcode::ImageSampleImplicitLod) {
new_inst->ReplaceOpcode(IR::Opcode::ImageGradient);
}
inst.ReplaceUsesWith(new_gradient_instruction);
}
void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
const IR::Value bank{inst.Arg(0)};
const IR::Value offset{inst.Arg(1)};
@@ -743,6 +957,12 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
case IR::Opcode::SelectF32:
case IR::Opcode::SelectF64:
return FoldSelect(inst);
case IR::Opcode::FPNeg32:
FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
return;
case IR::Opcode::FPAdd32:
FoldFPAdd32(inst);
return;
case IR::Opcode::FPMul32:
return FoldFPMul32(inst);
case IR::Opcode::LogicalAnd:
@@ -858,6 +1078,11 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
FoldDriverConstBuffer(env, block, inst, 1);
}
break;
case IR::Opcode::BindlessImageSampleImplicitLod:
case IR::Opcode::BoundImageSampleImplicitLod:
case IR::Opcode::ImageSampleImplicitLod:
FoldImageSampleImplicitLod(block, inst);
break;
default:
break;
}

View File

@@ -50,7 +50,6 @@ set(SHADER_FILES
vulkan_blit_depth_stencil.frag
vulkan_color_clear.frag
vulkan_color_clear.vert
vulkan_depthstencil_clear.frag
vulkan_fidelityfx_fsr_easu_fp16.comp
vulkan_fidelityfx_fsr_easu_fp32.comp
vulkan_fidelityfx_fsr_rcas_fp16.comp

View File

@@ -1,12 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
layout (push_constant) uniform PushConstants {
vec4 clear_depth;
};
void main() {
gl_FragDepth = clear_depth.x;
}

View File

@@ -16,7 +16,6 @@
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
#include "video_core/host_shaders/vulkan_color_clear_frag_spv.h"
#include "video_core/host_shaders/vulkan_color_clear_vert_spv.h"
#include "video_core/host_shaders/vulkan_depthstencil_clear_frag_spv.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -429,7 +428,6 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)),
clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)),
clear_stencil_frag(BuildShader(device, VULKAN_DEPTHSTENCIL_CLEAR_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
@@ -595,28 +593,6 @@ void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_ma
scheduler.InvalidateState();
}
void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool depth_clear,
f32 clear_depth, u8 stencil_mask, u32 stencil_ref,
u32 stencil_compare_mask, const Region2D& dst_region) {
const BlitDepthStencilPipelineKey key{
.renderpass = dst_framebuffer->RenderPass(),
.depth_clear = depth_clear,
.stencil_mask = stencil_mask,
.stencil_compare_mask = stencil_compare_mask,
.stencil_ref = stencil_ref,
};
const VkPipeline pipeline = FindOrEmplaceClearStencilPipeline(key);
const VkPipelineLayout layout = *clear_color_pipeline_layout;
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
BindBlitState(cmdbuf, dst_region);
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth);
cmdbuf.Draw(3, 1, 0, 0);
});
scheduler.InvalidateState();
}
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
@@ -844,61 +820,6 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearColorPipeline(const BlitImagePipel
return *clear_color_pipelines.back();
}
VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline(
const BlitDepthStencilPipelineKey& key) {
const auto it = std::ranges::find(clear_stencil_keys, key);
if (it != clear_stencil_keys.end()) {
return *clear_stencil_pipelines[std::distance(clear_stencil_keys.begin(), it)];
}
clear_stencil_keys.push_back(key);
const std::array stages = MakeStages(*clear_color_vert, *clear_stencil_frag);
const auto stencil = VkStencilOpState{
.failOp = VK_STENCIL_OP_KEEP,
.passOp = VK_STENCIL_OP_REPLACE,
.depthFailOp = VK_STENCIL_OP_KEEP,
.compareOp = VK_COMPARE_OP_ALWAYS,
.compareMask = key.stencil_compare_mask,
.writeMask = key.stencil_mask,
.reference = key.stencil_ref,
};
const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthTestEnable = VK_FALSE,
.depthWriteEnable = key.depth_clear,
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
.depthBoundsTestEnable = VK_FALSE,
.stencilTestEnable = VK_TRUE,
.front = stencil,
.back = stencil,
.minDepthBounds = 0.0f,
.maxDepthBounds = 0.0f,
};
clear_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &depth_stencil_ci,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *clear_color_pipeline_layout,
.renderPass = key.renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
}));
return *clear_stencil_pipelines.back();
}
void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass,
bool is_target_depth) {
if (pipeline) {

View File

@@ -27,16 +27,6 @@ struct BlitImagePipelineKey {
Tegra::Engines::Fermi2D::Operation operation;
};
struct BlitDepthStencilPipelineKey {
constexpr auto operator<=>(const BlitDepthStencilPipelineKey&) const noexcept = default;
VkRenderPass renderpass;
bool depth_clear;
u8 stencil_mask;
u32 stencil_compare_mask;
u32 stencil_ref;
};
class BlitImageHelper {
public:
explicit BlitImageHelper(const Device& device, Scheduler& scheduler,
@@ -74,10 +64,6 @@ public:
void ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask,
const std::array<f32, 4>& clear_color, const Region2D& dst_region);
void ClearDepthStencil(const Framebuffer* dst_framebuffer, bool depth_clear, f32 clear_depth,
u8 stencil_mask, u32 stencil_ref, u32 stencil_compare_mask,
const Region2D& dst_region);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
@@ -90,8 +76,6 @@ private:
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline FindOrEmplaceClearStencilPipeline(
const BlitDepthStencilPipelineKey& key);
void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth);
@@ -124,7 +108,6 @@ private:
vk::ShaderModule blit_depth_stencil_frag;
vk::ShaderModule clear_color_vert;
vk::ShaderModule clear_color_frag;
vk::ShaderModule clear_stencil_frag;
vk::ShaderModule convert_depth_to_float_frag;
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
@@ -139,8 +122,6 @@ private:
std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
std::vector<BlitImagePipelineKey> clear_color_keys;
std::vector<vk::Pipeline> clear_color_pipelines;
std::vector<BlitDepthStencilPipelineKey> clear_stencil_keys;
std::vector<vk::Pipeline> clear_stencil_pipelines;
vk::Pipeline convert_d32_to_r32_pipeline;
vk::Pipeline convert_r32_to_d32_pipeline;
vk::Pipeline convert_d16_to_r16_pipeline;

View File

@@ -428,27 +428,15 @@ void RasterizerVulkan::Clear(u32 layer_count) {
if (aspect_flags == 0) {
return;
}
if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) {
Region2D dst_region = {
Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width),
.y = clear_rect.rect.offset.y +
static_cast<s32>(clear_rect.rect.extent.height)}};
blit_image.ClearDepthStencil(framebuffer, use_depth, regs.clear_depth,
static_cast<u8>(regs.stencil_front_mask), regs.clear_stencil,
regs.stencil_front_func_mask, dst_region);
} else {
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
VkClearAttachment attachment;
attachment.aspectMask = aspect_flags;
attachment.colorAttachment = 0;
attachment.clearValue.depthStencil.depth = clear_depth;
attachment.clearValue.depthStencil.stencil = clear_stencil;
cmdbuf.ClearAttachments(attachment, clear_rect);
});
}
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
VkClearAttachment attachment;
attachment.aspectMask = aspect_flags;
attachment.colorAttachment = 0;
attachment.clearValue.depthStencil.depth = clear_depth;
attachment.clearValue.depthStencil.stencil = clear_stencil;
cmdbuf.ClearAttachments(attachment, clear_rect);
});
}
void RasterizerVulkan::DispatchCompute() {