Merge branch 'yuzu-emu:master' into master

Merge pull request #7218 from bylaws/aswdqdsam
Fixup channel submit IOCTL syncpoint parameters
2021-10-25 11:18:18 +02:00 · 2021-10-24 19:35:00 -04:00 · 2021-10-24 15:16:31 -04:00 · 2021-10-24 20:46:36 +02:00 · 2021-10-24 14:43:40 -04:00 · 2021-10-24 11:16:32 -04:00
13 changed files with 49 additions and 30 deletions
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -263,7 +263,7 @@ void ARM_Dynarmic_64::Run() {
 }

 void ARM_Dynarmic_64::Step() {
-    cb->InterpreterFallback(jit->GetPC(), 1);
+    jit->Step();
 }

 ARM_Dynarmic_64::ARM_Dynarmic_64(System& system_, CPUInterrupts& interrupt_handlers_,
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -69,8 +69,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
    std::vector<Reloc> relocs(params.relocation_count);
    std::vector<u32> reloc_shifts(params.relocation_count);
    std::vector<SyncptIncr> syncpt_increments(params.syncpoint_count);
-    std::vector<SyncptIncr> wait_checks(params.syncpoint_count);
-    std::vector<Fence> fences(params.fence_count);
+    std::vector<u32> fence_thresholds(params.fence_count);

    // Slice input into their respective buffers
    std::size_t offset = sizeof(IoctlSubmit);
@@ -78,15 +77,13 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
    offset += SliceVectors(input, relocs, params.relocation_count, offset);
    offset += SliceVectors(input, reloc_shifts, params.relocation_count, offset);
    offset += SliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
-    offset += SliceVectors(input, wait_checks, params.syncpoint_count, offset);
-    offset += SliceVectors(input, fences, params.fence_count, offset);
+    offset += SliceVectors(input, fence_thresholds, params.fence_count, offset);

    auto& gpu = system.GPU();
    if (gpu.UseNvdec()) {
        for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
            const SyncptIncr& syncpt_incr = syncpt_increments[i];
-            fences[i].id = syncpt_incr.id;
-            fences[i].value =
+            fence_thresholds[i] =
                syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments);
        }
    }
@@ -98,11 +95,6 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
                                  cmdlist.size() * sizeof(u32));
        gpu.PushCommandBuffer(cmdlist);
    }
-    if (gpu.UseNvdec()) {
-        fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
-        Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
-        gpu.PushCommandBuffer(cmdlist);
-    }
    std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
    // Some games expect command_buffers to be written back
    offset = sizeof(IoctlSubmit);
@@ -110,8 +102,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
    offset += WriteVectors(output, relocs, offset);
    offset += WriteVectors(output, reloc_shifts, offset);
    offset += WriteVectors(output, syncpt_increments, offset);
-    offset += WriteVectors(output, wait_checks, offset);
-    offset += WriteVectors(output, fences, offset);
+    offset += WriteVectors(output, fence_thresholds, offset);

    return NvResult::Success;
 }
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -56,19 +56,16 @@ protected:
        s32 target{};
        s32 target_offset{};
    };
-    static_assert(sizeof(Reloc) == 0x10, "CommandBuffer has incorrect size");
+    static_assert(sizeof(Reloc) == 0x10, "Reloc has incorrect size");

    struct SyncptIncr {
        u32 id{};
        u32 increments{};
+        u32 unk0{};
+        u32 unk1{};
+        u32 unk2{};
    };
-    static_assert(sizeof(SyncptIncr) == 0x8, "CommandBuffer has incorrect size");
-
-    struct Fence {
-        u32 id{};
-        u32 value{};
-    };
-    static_assert(sizeof(Fence) == 0x8, "CommandBuffer has incorrect size");
+    static_assert(sizeof(SyncptIncr) == 0x14, "SyncptIncr has incorrect size");

    struct IoctlGetSyncpoint {
        // Input
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -492,7 +492,7 @@ void TexturePass(Environment& env, IR::Program& program) {
            const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)};
            IR::IREmitter ir{*texture_inst.block, insert_point};
            const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))};
-            inst->SetArg(0, ir.SMin(ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift),
+            inst->SetArg(0, ir.UMin(ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift),
                                    ir.Imm32(DESCRIPTOR_SIZE - 1)));
        } else {
            inst->SetArg(0, IR::Value{});
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -38,6 +38,9 @@ enum : u8 {

    Shaders,

+    // Special entries
+    DepthBiasGlobal,
+
    LastCommonEntry,
 };

--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -181,9 +181,11 @@ Device::Device() {
        LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
        shader_backend = Settings::ShaderBackend::GLSL;
    }
+    // Completely disable async shaders for now, as it causes graphical glitches
+    use_asynchronous_shaders = false;
    // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
-    use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
-                               !(is_amd || (is_intel && !is_linux));
+    // use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
+    //                            !(is_amd || (is_intel && !is_linux));
    use_driver_cache = is_nvidia;

    LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
@@ -268,4 +270,4 @@ void main() {
 })");
 }

-} // namespace OpenGL
+} // namespace OpenGL
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -272,7 +272,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
      device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
      update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
      buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
-      use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
+      use_asynchronous_shaders{false},
      workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
      serialization_thread(1, "yuzu:PipelineSerialization") {
    const auto& float_control{device.FloatControlProperties()};
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -627,9 +627,21 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
    if (!state_tracker.TouchDepthBias()) {
        return;
    }
-    scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
+    float units = regs.polygon_offset_units / 2.0f;
+    const bool is_d24 = regs.zeta.format == Tegra::DepthFormat::S8_UINT_Z24_UNORM ||
+                        regs.zeta.format == Tegra::DepthFormat::D24X8_UNORM ||
+                        regs.zeta.format == Tegra::DepthFormat::D24S8_UNORM ||
+                        regs.zeta.format == Tegra::DepthFormat::D24C8_UNORM;
+    if (is_d24 && !device.SupportsD24DepthBuffer()) {
+        // the base formulas can be obtained from here:
+        //   https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias
+        const double rescale_factor =
+            static_cast<double>(1ULL << (32 - 24)) / (static_cast<double>(0x1.ep+127));
+        units = static_cast<float>(static_cast<double>(units) * rescale_factor);
+    }
+    scheduler.Record([constant = units, clamp = regs.polygon_offset_clamp,
                      factor = regs.polygon_offset_factor](vk::CommandBuffer cmdbuf) {
-        cmdbuf.SetDepthBias(constant, clamp, factor / 2.0f);
+        cmdbuf.SetDepthBias(constant, clamp, factor);
    });
 }

--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -54,6 +54,7 @@ void SetupDirtyViewports(Tables& tables) {
    FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
    FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
    tables[0][OFF(viewport_transform_enabled)] = Viewports;
+    tables[1][OFF(screen_y_control)] = Viewports;
 }

 void SetupDirtyScissors(Tables& tables) {
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -79,7 +79,8 @@ public:
    }

    bool TouchDepthBias() {
-        return Exchange(Dirty::DepthBias, false);
+        return Exchange(Dirty::DepthBias, false) ||
+               Exchange(VideoCommon::Dirty::DepthBiasGlobal, false);
    }

    bool TouchBlendConstants() {
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -221,6 +221,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
        BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
    }
    const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
+
    PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));

    for (size_t index = 0; index < NUM_RT; ++index) {
@@ -230,6 +231,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
        maxwell3d.regs.render_area.width,
        maxwell3d.regs.render_area.height,
    };
+
+    flags[Dirty::DepthBiasGlobal] = true;
 }

 template <class P>
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -623,6 +623,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
        is_float16_supported = false;
    }

+    supports_d24_depth =
+        IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
+                          VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal);
+
    graphics_queue = logical.GetQueue(graphics_family);
    present_queue = logical.GetQueue(present_family);
 }
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -332,6 +332,10 @@ public:
        return sets_per_pool;
    }

+    bool SupportsD24DepthBuffer() const {
+        return supports_d24_depth;
+    }
+
 private:
    /// Checks if the physical device is suitable.
    void CheckSuitability(bool requires_swapchain) const;
@@ -425,6 +429,7 @@ private:
    bool has_broken_cube_compatibility{};   ///< Has broken cube compatiblity bit
    bool has_renderdoc{};                   ///< Has RenderDoc attached
    bool has_nsight_graphics{};             ///< Has Nsight Graphics attached
+    bool supports_d24_depth{};              ///< Supports D24 depth buffers.

    // Telemetry parameters
    std::string vendor_name;                       ///< Device's driver name.
Author	SHA1	Message	Date
Darkywolf999	412678c879	Merge branch 'yuzu-emu:master' into master	2021-10-25 11:18:18 +02:00
Ameer J	3c8c17be4d	Merge pull request #7218 from bylaws/aswdqdsam Fixup channel submit IOCTL syncpoint parameters	2021-10-24 19:35:00 -04:00
Ameer J	aed5878dd3	Merge pull request #7222 from FernandoS27/fix-indixed-textures-again TexturePass: Fix clamping of images as this allowed negative indices.	2021-10-24 15:16:31 -04:00
Fernando Sahmkow	e5291e2031	TexturePass: Fix clamping of images as this allowed negative indices.	2021-10-24 20:46:36 +02:00
Mai M	3be87bed8d	Merge pull request #7221 from astrelsky/stepfix Fixed ARM_Dynamic_64 Step	2021-10-24 14:43:40 -04:00
Andrew Strelsky	31b9797296	Fixed ARM_Dynamic_64 Step	2021-10-24 11:16:32 -04:00
Darkywolf999	58782739e5	Merge branch 'yuzu-emu:master' into master	2021-10-24 09:11:12 +02:00
Fernando S	5299554bb0	Merge pull request #7206 from vonchenplus/fix_vulkan_viewport_issue Vulkan Rasterizer: Fix viewport issue	2021-10-24 01:53:04 +02:00
Billy Laws	2dbef58eeb	Fixup channel submit IOCTL syncpoint parameters The current arguments worked by happenstance as games only ever submit one syncpoint and request one fence back, if a game were to do something other than this then the arguments would've been parsed entirely wrong.	2021-10-24 00:01:35 +01:00
Ameer J	494e34af6a	Merge pull request #7070 from FernandoS27/want-you-bad Vulkan Rasterizer: Correct DepthBias/PolygonOffset on Vulkan.	2021-10-23 18:02:23 -04:00
Fernando Sahmkow	ad8afaf1ef	Vulran Rasterizer: address feedback.	2021-10-23 23:46:29 +02:00
Narr the Reg	2686bf6734	Merge pull request #7217 from yuzu-emu/revert-6515-gc_thread_safe Revert "input_common: Fix data race on GC implementation"	2021-10-23 16:41:09 -05:00
Fernando S	33e92c15eb	Revert "input_common: Fix data race on GC implementation"	2021-10-23 23:32:16 +02:00
Fernando S	7461196839	Merge pull request #6515 from german77/gc_thread_safe input_common: Fix data race on GC implementation	2021-10-23 23:28:22 +02:00
Feng Chen	b769bea61b	Fix vulkan viewport issue	2021-10-22 22:56:31 +08:00
Darkywolf999	26cf341d75	Merge branch 'yuzu-emu:master' into master	2021-10-21 10:37:31 +02:00
bunnei	b65314dc21	Merge pull request #7207 from ameerj/vs-2022 cmake: Update conan to support Visual Studio 2022	2021-10-20 15:17:50 -07:00
Darkywolf999	ef545fada5	Merge branch 'yuzu-emu:master' into master	2021-10-18 13:44:49 +02:00
Darkywolf999	61aa5b13fb	Merge branch 'master' of https://github.com/Darkywolf999/yuzu	2021-10-16 08:12:39 +02:00
Darkywolf999	b113372693	Revert "Revert "renderers: Disable async shader compilation"" This reverts commit `3c6d440015`.	2021-10-16 08:12:27 +02:00
Darkywolf999	62da8e3585	Revert "Revert "renderers: Disable async shader compilation"" This reverts commit `3c6d440015`.	2021-10-16 07:59:49 +02:00
Darkywolf999	b48b4b503f	Merge branch 'yuzu-emu:master' into master	2021-10-16 07:56:58 +02:00
Darkywolf999	d8b1757cf9	Merge pull request #5 from Darkywolf999/Darkywolf999-patch-5 Update gl_device.cpp	2021-10-15 11:02:14 +02:00
Darkywolf999	8b433f1ff1	Update gl_device.cpp fix for vertex explosions in diablo 2. disabled GL_EXT_texture_shadow_lod & GLAD_GL_NV_vertex_buffer_unified_memory cannot find anything on opengl that this two are still in use also not on google.	2021-10-15 11:02:04 +02:00
Darkywolf999	9fdc9177b2	Merge pull request #4 from Darkywolf999/Darkywolf999-patch-4 Update gl_device.cpp	2021-10-14 22:00:58 +02:00
Darkywolf999	a5818a28a3	Update gl_device.cpp revert everything only the memory stays for now to fix the screen render problem with this game. will make other pr's for different problems.	2021-10-14 22:00:48 +02:00
Darkywolf999	e9bc4b71dc	Merge pull request #3 from Darkywolf999/Darkywolf999-patch-3 Update gl_device.cpp	2021-10-14 18:42:26 +02:00
Darkywolf999	e54b0ff7a5	Update gl_device.cpp	2021-10-14 18:42:01 +02:00
Darkywolf999	ba5df1a663	Merge pull request #2 from Darkywolf999/Darkywolf999-patch-2 Update gl_device.cpp	2021-10-14 18:32:20 +02:00
Darkywolf999	f2a5511224	Update gl_device.cpp	2021-10-14 18:32:04 +02:00
Darkywolf999	bf7310e336	Merge pull request #1 from Darkywolf999/Darkywolf999-patch-1 Update gl_device.cpp	2021-10-14 18:28:19 +02:00
Darkywolf999	34ba75b98c	Update gl_device.cpp	2021-10-14 18:27:22 +02:00
Darkywolf999	6e9b16426a	Fix for Snack world and proberaly some other games that where rendered upside down or not correctly Fix ? for blocking async on windows with intel and amd drivers for opengl Maybe something is odd as this is my first contribution but please let me know if i did something not correctly.	2021-10-14 11:10:23 +02:00
Fernando Sahmkow	60a3980561	Vulkan Rasterizer: Correct DepthBias/PolygonOffset on Vulkan.	2021-09-23 03:49:10 +02:00
Rodrigo Locatti	d4cab35533	input_common: Fix data race on GC implementation	2021-08-07 16:46:26 -05:00