Compare commits

..

5 Commits

Author SHA1 Message Date
Billy Laws
2dbef58eeb Fixup channel submit IOCTL syncpoint parameters
The current arguments worked by happenstance as games only ever submit
one syncpoint and request one fence back, if a game were to do something
other than this then the arguments would've been parsed entirely wrong.
2021-10-24 00:01:35 +01:00
Ameer J
494e34af6a Merge pull request #7070 from FernandoS27/want-you-bad
Vulkan Rasterizer: Correct DepthBias/PolygonOffset on Vulkan.
2021-10-23 18:02:23 -04:00
Fernando Sahmkow
ad8afaf1ef Vulran Rasterizer: address feedback. 2021-10-23 23:46:29 +02:00
Narr the Reg
2686bf6734 Merge pull request #7217 from yuzu-emu/revert-6515-gc_thread_safe
Revert "input_common: Fix data race on GC implementation"
2021-10-23 16:41:09 -05:00
Fernando Sahmkow
60a3980561 Vulkan Rasterizer: Correct DepthBias/PolygonOffset on Vulkan. 2021-09-23 03:49:10 +02:00
8 changed files with 40 additions and 24 deletions

View File

@@ -69,8 +69,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
std::vector<Reloc> relocs(params.relocation_count);
std::vector<u32> reloc_shifts(params.relocation_count);
std::vector<SyncptIncr> syncpt_increments(params.syncpoint_count);
std::vector<SyncptIncr> wait_checks(params.syncpoint_count);
std::vector<Fence> fences(params.fence_count);
std::vector<u32> fence_thresholds(params.fence_count);
// Slice input into their respective buffers
std::size_t offset = sizeof(IoctlSubmit);
@@ -78,15 +77,13 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
offset += SliceVectors(input, relocs, params.relocation_count, offset);
offset += SliceVectors(input, reloc_shifts, params.relocation_count, offset);
offset += SliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
offset += SliceVectors(input, wait_checks, params.syncpoint_count, offset);
offset += SliceVectors(input, fences, params.fence_count, offset);
offset += SliceVectors(input, fence_thresholds, params.fence_count, offset);
auto& gpu = system.GPU();
if (gpu.UseNvdec()) {
for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
const SyncptIncr& syncpt_incr = syncpt_increments[i];
fences[i].id = syncpt_incr.id;
fences[i].value =
fence_thresholds[i] =
syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments);
}
}
@@ -98,11 +95,6 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(cmdlist);
}
if (gpu.UseNvdec()) {
fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
gpu.PushCommandBuffer(cmdlist);
}
std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
// Some games expect command_buffers to be written back
offset = sizeof(IoctlSubmit);
@@ -110,8 +102,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
offset += WriteVectors(output, relocs, offset);
offset += WriteVectors(output, reloc_shifts, offset);
offset += WriteVectors(output, syncpt_increments, offset);
offset += WriteVectors(output, wait_checks, offset);
offset += WriteVectors(output, fences, offset);
offset += WriteVectors(output, fence_thresholds, offset);
return NvResult::Success;
}

View File

@@ -56,19 +56,16 @@ protected:
s32 target{};
s32 target_offset{};
};
static_assert(sizeof(Reloc) == 0x10, "CommandBuffer has incorrect size");
static_assert(sizeof(Reloc) == 0x10, "Reloc has incorrect size");
struct SyncptIncr {
u32 id{};
u32 increments{};
u32 unk0{};
u32 unk1{};
u32 unk2{};
};
static_assert(sizeof(SyncptIncr) == 0x8, "CommandBuffer has incorrect size");
struct Fence {
u32 id{};
u32 value{};
};
static_assert(sizeof(Fence) == 0x8, "CommandBuffer has incorrect size");
static_assert(sizeof(SyncptIncr) == 0x14, "SyncptIncr has incorrect size");
struct IoctlGetSyncpoint {
// Input

View File

@@ -38,6 +38,9 @@ enum : u8 {
Shaders,
// Special entries
DepthBiasGlobal,
LastCommonEntry,
};

View File

@@ -627,9 +627,21 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBias()) {
return;
}
scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
float units = regs.polygon_offset_units / 2.0f;
const bool is_d24 = regs.zeta.format == Tegra::DepthFormat::S8_UINT_Z24_UNORM ||
regs.zeta.format == Tegra::DepthFormat::D24X8_UNORM ||
regs.zeta.format == Tegra::DepthFormat::D24S8_UNORM ||
regs.zeta.format == Tegra::DepthFormat::D24C8_UNORM;
if (is_d24 && !device.SupportsD24DepthBuffer()) {
// the base formulas can be obtained from here:
// https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias
const double rescale_factor =
static_cast<double>(1ULL << (32 - 24)) / (static_cast<double>(0x1.ep+127));
units = static_cast<float>(static_cast<double>(units) * rescale_factor);
}
scheduler.Record([constant = units, clamp = regs.polygon_offset_clamp,
factor = regs.polygon_offset_factor](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBias(constant, clamp, factor / 2.0f);
cmdbuf.SetDepthBias(constant, clamp, factor);
});
}

View File

@@ -79,7 +79,8 @@ public:
}
bool TouchDepthBias() {
return Exchange(Dirty::DepthBias, false);
return Exchange(Dirty::DepthBias, false) ||
Exchange(VideoCommon::Dirty::DepthBiasGlobal, false);
}
bool TouchBlendConstants() {

View File

@@ -221,6 +221,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
}
const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
for (size_t index = 0; index < NUM_RT; ++index) {
@@ -230,6 +231,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
maxwell3d.regs.render_area.width,
maxwell3d.regs.render_area.height,
};
flags[Dirty::DepthBiasGlobal] = true;
}
template <class P>

View File

@@ -623,6 +623,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
is_float16_supported = false;
}
supports_d24_depth =
IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal);
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
}

View File

@@ -332,6 +332,10 @@ public:
return sets_per_pool;
}
bool SupportsD24DepthBuffer() const {
return supports_d24_depth;
}
private:
/// Checks if the physical device is suitable.
void CheckSuitability(bool requires_swapchain) const;
@@ -425,6 +429,7 @@ private:
bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.