Compare commits

..

18 Commits

Author SHA1 Message Date
Markus Wick
c76ffa5019 core/kernel: Fix GetTotalPhysicalMemoryUsed.
module._memory was already moved over to a new shared_ptr.
So code_memory_size was not increased at all.

This lowers the heap space and so saves a bit of memory, usually between 50 to 100 MB.

This fixes a regression of c0a01f3adc
2020-01-11 14:04:44 +01:00
Fernando Sahmkow
80436c1330 Merge pull request #3279 from ReinUsesLisp/vk-pipeline-cache
vk_pipeline_cache: Initial implementation
2020-01-08 17:31:20 -04:00
bunnei
319c4d2108 Merge pull request #3272 from bunnei/vi-close-layer
service: vi: Implement CloseLayer.
2020-01-07 12:45:34 -05:00
ReinUsesLisp
6888d776ff vk_pipeline_cache: Initial implementation
Given a pipeline key, this cache returns a pipeline abstraction (for
graphics or compute).
2020-01-06 22:02:26 -03:00
ReinUsesLisp
2effdeb924 vk_graphics_pipeline: Initial implementation
This abstractio represents the state of the 3D engine at a given draw.
Instead of changing individual bits of the pipeline how it's done in
APIs like D3D11, OpenGL and NVN; on Vulkan we are forced to put
everything together into a single, immutable object.

It takes advantage of the few dynamic states Vulkan offers.
2020-01-06 22:02:26 -03:00
ReinUsesLisp
dc96a59fa0 vk_compute_pipeline: Initial implementation
This abstraction represents a Vulkan compute pipeline.
2020-01-06 22:02:26 -03:00
ReinUsesLisp
b392a5986e vk_pipeline_cache: Add file and define descriptor update template filler
This function allows us to share code between compute and graphics
pipelines compilation.
2020-01-06 22:02:26 -03:00
ReinUsesLisp
3142f1b597 fixed_pipeline_state: Add depth clamp 2020-01-06 22:02:26 -03:00
ReinUsesLisp
9c548146ca vk_rasterizer: Add placeholder 2020-01-06 22:02:26 -03:00
bunnei
5be00cba15 Merge pull request #3276 from ReinUsesLisp/pipeline-reqs
vk_update_descriptor/vk_renderpass_cache: Add pipeline cache dependencies
2020-01-06 17:03:34 -05:00
bunnei
ee9b4a7f9a Merge pull request #3278 from ReinUsesLisp/vk-memory-manager
renderer_vulkan: Buffer cache, stream buffer and memory manager changes
2020-01-06 17:03:04 -05:00
ReinUsesLisp
5aeff9aff5 vk_renderpass_cache: Initial implementation
The renderpass cache is used to avoid creating renderpasses on each
draw. The hashed structure is not currently optimized.
2020-01-06 18:28:32 -03:00
ReinUsesLisp
322d6a0311 vk_update_descriptor: Initial implementation
The update descriptor is used to store in flat memory a large chunk of
staging data used to update descriptor sets through templates. It
provides a push interface to easily insert descriptors following the
current pipeline. The order used in the descriptor update template has
to be implicitly followed. We can catch bugs here using validation
layers.
2020-01-06 18:28:32 -03:00
bunnei
984563b773 Merge pull request #3277 from ReinUsesLisp/make-current
yuzu/bootmanager: Remove {glx,wgl}MakeCurrent on SwapBuffers
2020-01-06 14:09:19 -05:00
ReinUsesLisp
8306703a7d yuzu/bootmanager: Remove {glx,wgl}MakeCurrent on SwapBuffers
MakeCurrent is a costly (according to Nsight's profiler it takes a tenth
of a millisecond to complete), and we don't have a reason to call it
because:
- Qt no longer signals a warning if it's not called
- yuzu no longer supports macOS
2020-01-06 14:02:47 -03:00
bunnei
09908207fb Merge pull request #3261 from degasus/page_table
core/memory + arm/dynarmic: Use a global offset within our arm page table.
2020-01-06 11:56:59 -05:00
bunnei
64c5631579 service: vi: Implement CloseLayer.
- Needed for Undertale.
2020-01-04 00:45:06 -05:00
Markus Wick
0986caa8d8 core/memory + arm/dynarmic: Use a global offset within our arm page table.
This saves us two x64 instructions per load/store instruction.

TODO: Clean up our memory code. We can use this optimization here as well.
2020-01-01 12:24:54 +01:00
24 changed files with 1587 additions and 43 deletions

View File

@@ -141,6 +141,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
config.page_table_address_space_bits = address_space_bits;
config.silently_mirror_page_table = false;
config.absolute_offset_page_table = true;
// Multi-process state
config.processor_id = core_index;

View File

@@ -317,6 +317,8 @@ void Process::FreeTLSRegion(VAddr tls_address) {
}
void Process::LoadModule(CodeSet module_, VAddr base_addr) {
code_memory_size += module_.memory.size();
const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));
const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
@@ -332,8 +334,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
code_memory_size += module_.memory.size();
}
Process::Process(Core::System& system)

View File

@@ -88,6 +88,12 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
return layer_id;
}
void NVFlinger::CloseLayer(u64 layer_id) {
for (auto& display : displays) {
display.CloseLayer(layer_id);
}
}
std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
const auto* const layer = FindLayer(display_id, layer_id);

View File

@@ -54,6 +54,9 @@ public:
/// If an invalid display ID is specified, then an empty optional is returned.
std::optional<u64> CreateLayer(u64 display_id);
/// Closes a layer on all displays for the given layer ID.
void CloseLayer(u64 layer_id);
/// Finds the buffer queue ID of the specified layer in the specified display.
///
/// If an invalid display ID or layer ID is provided, then an empty optional is returned.

View File

@@ -24,11 +24,11 @@ Display::Display(u64 id, std::string name, Core::System& system) : id{id}, name{
Display::~Display() = default;
Layer& Display::GetLayer(std::size_t index) {
return layers.at(index);
return *layers.at(index);
}
const Layer& Display::GetLayer(std::size_t index) const {
return layers.at(index);
return *layers.at(index);
}
std::shared_ptr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
@@ -43,29 +43,38 @@ void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
// TODO(Subv): Support more than 1 layer.
ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
layers.emplace_back(id, buffer_queue);
layers.emplace_back(std::make_shared<Layer>(id, buffer_queue));
}
void Display::CloseLayer(u64 id) {
layers.erase(
std::remove_if(layers.begin(), layers.end(),
[id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; }),
layers.end());
}
Layer* Display::FindLayer(u64 id) {
const auto itr = std::find_if(layers.begin(), layers.end(),
[id](const VI::Layer& layer) { return layer.GetID() == id; });
const auto itr =
std::find_if(layers.begin(), layers.end(),
[id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });
if (itr == layers.end()) {
return nullptr;
}
return &*itr;
return itr->get();
}
const Layer* Display::FindLayer(u64 id) const {
const auto itr = std::find_if(layers.begin(), layers.end(),
[id](const VI::Layer& layer) { return layer.GetID() == id; });
const auto itr =
std::find_if(layers.begin(), layers.end(),
[id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });
if (itr == layers.end()) {
return nullptr;
}
return &*itr;
return itr->get();
}
} // namespace Service::VI

View File

@@ -4,6 +4,7 @@
#pragma once
#include <memory>
#include <string>
#include <vector>
@@ -69,6 +70,12 @@ public:
///
void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
/// Closes and removes a layer from this display with the given ID.
///
/// @param id The ID assigned to the layer to close.
///
void CloseLayer(u64 id);
/// Attempts to find a layer with the given ID.
///
/// @param id The layer ID.
@@ -91,7 +98,7 @@ private:
u64 id;
std::string name;
std::vector<Layer> layers;
std::vector<std::shared_ptr<Layer>> layers;
Kernel::EventPair vsync_event;
};

View File

@@ -1066,6 +1066,18 @@ private:
rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
}
void CloseLayer(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto layer_id{rp.Pop<u64>()};
LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}", layer_id);
nv_flinger->CloseLayer(layer_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void CreateStrayLayer(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const u32 flags = rp.Pop<u32>();
@@ -1178,7 +1190,7 @@ IApplicationDisplayService::IApplicationDisplayService(
{1101, &IApplicationDisplayService::SetDisplayEnabled, "SetDisplayEnabled"},
{1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"},
{2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"},
{2021, nullptr, "CloseLayer"},
{2021, &IApplicationDisplayService::CloseLayer, "CloseLayer"},
{2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"},
{2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"},
{2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},

View File

@@ -146,7 +146,7 @@ struct Memory::Impl {
u8* GetPointer(const VAddr vaddr) {
u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
if (page_pointer != nullptr) {
return page_pointer + (vaddr & PAGE_MASK);
return page_pointer + vaddr;
}
if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
@@ -229,7 +229,8 @@ struct Memory::Impl {
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
const u8* const src_ptr = page_table.pointers[page_index] + page_offset;
const u8* const src_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_buffer, src_ptr, copy_amount);
break;
}
@@ -276,7 +277,8 @@ struct Memory::Impl {
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
u8* const dest_ptr = page_table.pointers[page_index] + page_offset;
u8* const dest_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
@@ -322,7 +324,8 @@ struct Memory::Impl {
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
u8* dest_ptr = page_table.pointers[page_index] + page_offset;
u8* dest_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
std::memset(dest_ptr, 0, copy_amount);
break;
}
@@ -368,7 +371,8 @@ struct Memory::Impl {
}
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
const u8* src_ptr = page_table.pointers[page_index] + page_offset;
const u8* src_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
WriteBlock(process, dest_addr, src_ptr, copy_amount);
break;
}
@@ -446,7 +450,8 @@ struct Memory::Impl {
page_type = Common::PageType::Unmapped;
} else {
page_type = Common::PageType::Memory;
current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
current_page_table->pointers[vaddr >> PAGE_BITS] =
pointer - (vaddr & ~PAGE_MASK);
}
break;
}
@@ -493,7 +498,9 @@ struct Memory::Impl {
memory);
} else {
while (base != end) {
page_table.pointers[base] = memory;
page_table.pointers[base] = memory - (base << PAGE_BITS);
ASSERT_MSG(page_table.pointers[base],
"memory mapping base yield a nullptr within the table");
base += 1;
memory += PAGE_SIZE;
@@ -518,7 +525,7 @@ struct Memory::Impl {
if (page_pointer != nullptr) {
// NOTE: Avoid adding any extra logic to this fast-path block
T value;
std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
std::memcpy(&value, &page_pointer[vaddr], sizeof(T));
return value;
}
@@ -559,7 +566,7 @@ struct Memory::Impl {
u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
if (page_pointer != nullptr) {
// NOTE: Avoid adding any extra logic to this fast-path block
std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
std::memcpy(&page_pointer[vaddr], &data, sizeof(T));
return;
}

View File

@@ -155,14 +155,23 @@ if (ENABLE_VULKAN)
renderer_vulkan/maxwell_to_vk.h
renderer_vulkan/vk_buffer_cache.cpp
renderer_vulkan/vk_buffer_cache.h
renderer_vulkan/vk_compute_pipeline.cpp
renderer_vulkan/vk_compute_pipeline.h
renderer_vulkan/vk_descriptor_pool.cpp
renderer_vulkan/vk_descriptor_pool.h
renderer_vulkan/vk_device.cpp
renderer_vulkan/vk_device.h
renderer_vulkan/vk_graphics_pipeline.cpp
renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_image.cpp
renderer_vulkan/vk_image.h
renderer_vulkan/vk_memory_manager.cpp
renderer_vulkan/vk_memory_manager.h
renderer_vulkan/vk_pipeline_cache.cpp
renderer_vulkan/vk_pipeline_cache.h
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_renderpass_cache.cpp
renderer_vulkan/vk_renderpass_cache.h
renderer_vulkan/vk_resource_manager.cpp
renderer_vulkan/vk_resource_manager.h
renderer_vulkan/vk_sampler_cache.cpp
@@ -176,7 +185,9 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h
renderer_vulkan/vk_swapchain.cpp
renderer_vulkan/vk_swapchain.h)
renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)

View File

@@ -109,6 +109,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
const auto topology = static_cast<std::size_t>(regs.draw.topology.Value());
const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]];
const auto& clip = regs.view_volume_clip_control;
const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
Maxwell::Cull::FrontFace front_face = regs.cull.front_face;
if (regs.screen_y_control.triangle_rast_flip != 0 &&
regs.viewport_transform[0].scale_y > 0.0f) {
@@ -119,8 +122,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
}
const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, gl_ndc,
regs.cull.cull_face, front_face);
return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled,
depth_clamp_enabled, gl_ndc, regs.cull.cull_face,
front_face);
}
} // Anonymous namespace
@@ -222,15 +226,17 @@ bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const
std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept {
return static_cast<std::size_t>(cull_enable) ^
(static_cast<std::size_t>(depth_bias_enable) << 1) ^
(static_cast<std::size_t>(ndc_minus_one_to_one) << 2) ^
(static_cast<std::size_t>(depth_clamp_enable) << 2) ^
(static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^
(static_cast<std::size_t>(cull_face) << 24) ^
(static_cast<std::size_t>(front_face) << 48);
}
bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept {
return std::tie(cull_enable, depth_bias_enable, ndc_minus_one_to_one, cull_face, front_face) ==
std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.ndc_minus_one_to_one, rhs.cull_face,
rhs.front_face);
return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one,
cull_face, front_face) ==
std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable,
rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face);
}
std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept {

View File

@@ -170,15 +170,17 @@ struct FixedPipelineState {
};
struct Rasterizer {
constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool ndc_minus_one_to_one,
Maxwell::Cull::CullFace cull_face, Maxwell::Cull::FrontFace front_face)
constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face,
Maxwell::Cull::FrontFace front_face)
: cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
ndc_minus_one_to_one{ndc_minus_one_to_one}, cull_face{cull_face}, front_face{
front_face} {}
depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
cull_face{cull_face}, front_face{front_face} {}
Rasterizer() = default;
bool cull_enable;
bool depth_bias_enable;
bool depth_clamp_enable;
bool ndc_minus_one_to_one;
Maxwell::Cull::CullFace cull_face;
Maxwell::Cull::FrontFace front_face;

View File

@@ -0,0 +1,112 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <vector>
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
namespace Vulkan {
VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
const SPIRVShader& shader)
: device{device}, scheduler{scheduler}, entries{shader.entries},
descriptor_set_layout{CreateDescriptorSetLayout()},
descriptor_allocator{descriptor_pool, *descriptor_set_layout},
update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate()},
shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {}
VKComputePipeline::~VKComputePipeline() = default;
vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
const auto set = descriptor_allocator.Commit(scheduler.GetFence());
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
std::vector<vk::DescriptorSetLayoutBinding> bindings;
u32 binding = 0;
const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
// TODO(Rodrigo): Maybe make individual bindings here?
for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute,
nullptr);
}
};
AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
{}, static_cast<u32>(bindings.size()), bindings.data());
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
}
UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const {
const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr);
const auto dev = device.GetLogical();
return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader());
}
UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const {
std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
u32 binding = 0;
u32 offset = 0;
FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries);
if (template_entries.empty()) {
// If the shader doesn't use descriptor sets, skip template creation.
return UniqueDescriptorUpdateTemplate{};
}
const vk::DescriptorUpdateTemplateCreateInfo template_ci(
{}, static_cast<u32>(template_entries.size()), template_entries.data(),
vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
}
UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data());
const auto dev = device.GetLogical();
return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader());
}
UniquePipeline VKComputePipeline::CreatePipeline() const {
vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute,
*shader_module, "main", nullptr);
vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) {
shader_stage_ci.pNext = &subgroup_size_ci;
}
const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0);
const auto dev = device.GetLogical();
return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader());
}
} // namespace Vulkan

View File

@@ -0,0 +1,66 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
namespace Vulkan {
class VKDevice;
class VKScheduler;
class VKUpdateDescriptorQueue;
class VKComputePipeline final {
public:
explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
const SPIRVShader& shader);
~VKComputePipeline();
vk::DescriptorSet CommitDescriptorSet();
vk::Pipeline GetHandle() const {
return *pipeline;
}
vk::PipelineLayout GetLayout() const {
return *layout;
}
const ShaderEntries& GetEntries() {
return entries;
}
private:
UniqueDescriptorSetLayout CreateDescriptorSetLayout() const;
UniquePipelineLayout CreatePipelineLayout() const;
UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const;
UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const;
UniquePipeline CreatePipeline() const;
const VKDevice& device;
VKScheduler& scheduler;
ShaderEntries entries;
UniqueDescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
VKUpdateDescriptorQueue& update_descriptor_queue;
UniquePipelineLayout layout;
UniqueDescriptorUpdateTemplate descriptor_template;
UniqueShaderModule shader_module;
UniquePipeline pipeline;
};
} // namespace Vulkan

View File

@@ -0,0 +1,271 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <vector>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
namespace {
vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) {
return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail),
MaxwellToVK::StencilOp(face.action_depth_pass),
MaxwellToVK::StencilOp(face.action_depth_fail),
MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0);
}
bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) {
static constexpr std::array unsupported_topologies = {
vk::PrimitiveTopology::ePointList,
vk::PrimitiveTopology::eLineList,
vk::PrimitiveTopology::eTriangleList,
vk::PrimitiveTopology::eLineListWithAdjacency,
vk::PrimitiveTopology::eTriangleListWithAdjacency,
vk::PrimitiveTopology::ePatchList};
return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
topology) == std::end(unsupported_topologies);
}
} // Anonymous namespace
VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache,
const GraphicsPipelineCacheKey& key,
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
const SPIRVProgram& program)
: device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
descriptor_allocator{descriptor_pool, *descriptor_set_layout},
update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
program)},
renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
key.renderpass_params,
program)} {}
VKGraphicsPipeline::~VKGraphicsPipeline() = default;
vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
const auto set = descriptor_allocator.Commit(scheduler.GetFence());
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const {
const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
{}, static_cast<u32>(bindings.size()), bindings.data());
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
}
UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0,
nullptr);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
}
UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
const SPIRVProgram& program) const {
std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
u32 binding = 0;
u32 offset = 0;
for (const auto& stage : program) {
if (stage) {
FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset,
template_entries);
}
}
if (template_entries.empty()) {
// If the shader doesn't use descriptor sets, skip template creation.
return UniqueDescriptorUpdateTemplate{};
}
const vk::DescriptorUpdateTemplateCreateInfo template_ci(
{}, static_cast<u32>(template_entries.size()), template_entries.data(),
vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
}
std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules(
const SPIRVProgram& program) const {
std::vector<UniqueShaderModule> modules;
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
const auto& stage = program[i];
if (!stage) {
continue;
}
const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32),
stage->code.data());
modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld));
}
return modules;
}
UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
const SPIRVProgram& program) const {
const auto& vi = fixed_state.vertex_input;
const auto& ia = fixed_state.input_assembly;
const auto& ds = fixed_state.depth_stencil;
const auto& cd = fixed_state.color_blending;
const auto& ts = fixed_state.tessellation;
const auto& rs = fixed_state.rasterizer;
std::vector<vk::VertexInputBindingDescription> vertex_bindings;
std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
for (std::size_t i = 0; i < vi.num_bindings; ++i) {
const auto& binding = vi.bindings[i];
const bool instanced = binding.divisor != 0;
const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex;
vertex_bindings.emplace_back(binding.index, binding.stride, rate);
if (instanced) {
vertex_binding_divisors.emplace_back(binding.index, binding.divisor);
}
}
std::vector<vk::VertexInputAttributeDescription> vertex_attributes;
const auto& input_attributes = program[0]->entries.attributes;
for (std::size_t i = 0; i < vi.num_attributes; ++i) {
const auto& attribute = vi.attributes[i];
if (input_attributes.find(attribute.index) == input_attributes.end()) {
// Skip attributes not used by the vertex shaders.
continue;
}
vertex_attributes.emplace_back(attribute.index, attribute.buffer,
MaxwellToVK::VertexFormat(attribute.type, attribute.size),
attribute.offset);
}
vk::PipelineVertexInputStateCreateInfo vertex_input_ci(
{}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(),
static_cast<u32>(vertex_attributes.size()), vertex_attributes.data());
const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci(
static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data());
if (!vertex_binding_divisors.empty()) {
vertex_input_ci.pNext = &vertex_input_divisor_ci;
}
const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology);
const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci(
{}, primitive_topology,
ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology));
const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points);
const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr,
Maxwell::NumViewports, nullptr);
// TODO(Rodrigo): Find out what's the default register value for front face
const vk::PipelineRasterizationStateCreateInfo rasterizer_ci(
{}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill,
rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone,
rs.cull_enable ? MaxwellToVK::FrontFace(rs.front_face) : vk::FrontFace::eCounterClockwise,
rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f);
const vk::PipelineMultisampleStateCreateInfo multisampling_ci(
{}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false);
const vk::CompareOp depth_test_compare = ds.depth_test_enable
? MaxwellToVK::ComparisonOp(ds.depth_test_function)
: vk::CompareOp::eAlways;
const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci(
{}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable,
ds.stencil_enable, GetStencilFaceState(ds.front_stencil),
GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f);
std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const std::size_t num_attachments =
std::min(cd.attachments_count, renderpass_params.color_attachments.size());
for (std::size_t i = 0; i < num_attachments; ++i) {
constexpr std::array component_table{
vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG,
vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA};
const auto& blend = cd.attachments[i];
vk::ColorComponentFlags color_components{};
for (std::size_t j = 0; j < component_table.size(); ++j) {
if (blend.components[j])
color_components |= component_table[j];
}
cb_attachments[i] = vk::PipelineColorBlendAttachmentState(
blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func),
MaxwellToVK::BlendFactor(blend.dst_rgb_func),
MaxwellToVK::BlendEquation(blend.rgb_equation),
MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func),
MaxwellToVK::BlendEquation(blend.a_equation), color_components);
}
const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy,
static_cast<u32>(num_attachments),
cb_attachments.data(), {});
constexpr std::array dynamic_states = {
vk::DynamicState::eViewport, vk::DynamicState::eScissor,
vk::DynamicState::eDepthBias, vk::DynamicState::eBlendConstants,
vk::DynamicState::eDepthBounds, vk::DynamicState::eStencilCompareMask,
vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference};
const vk::PipelineDynamicStateCreateInfo dynamic_state_ci(
{}, static_cast<u32>(dynamic_states.size()), dynamic_states.data());
vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
std::size_t module_index = 0;
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
if (!program[stage]) {
continue;
}
const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage);
const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum);
auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage,
*modules[module_index++], "main", nullptr);
if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) {
stage_ci.pNext = &subgroup_size_ci;
}
}
const vk::GraphicsPipelineCreateInfo create_info(
{}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci,
&input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci,
&depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld);
}
} // namespace Vulkan

View File

@@ -0,0 +1,90 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <memory>
#include <optional>
#include <unordered_map>
#include <vector>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsPipelineCacheKey;
class VKDescriptorPool;
class VKDevice;
class VKRenderPassCache;
class VKScheduler;
class VKUpdateDescriptorQueue;
using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
class VKGraphicsPipeline final {
public:
explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache,
const GraphicsPipelineCacheKey& key,
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
const SPIRVProgram& program);
~VKGraphicsPipeline();
vk::DescriptorSet CommitDescriptorSet();
vk::Pipeline GetHandle() const {
return *pipeline;
}
vk::PipelineLayout GetLayout() const {
return *layout;
}
vk::RenderPass GetRenderPass() const {
return renderpass;
}
private:
UniqueDescriptorSetLayout CreateDescriptorSetLayout(
const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const;
UniquePipelineLayout CreatePipelineLayout() const;
UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate(
const SPIRVProgram& program) const;
std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params,
const SPIRVProgram& program) const;
const VKDevice& device;
VKScheduler& scheduler;
const FixedPipelineState fixed_state;
const u64 hash;
UniqueDescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
VKUpdateDescriptorQueue& update_descriptor_queue;
UniquePipelineLayout layout;
UniqueDescriptorUpdateTemplate descriptor_template;
std::vector<UniqueShaderModule> modules;
vk::RenderPass renderpass;
UniquePipeline pipeline;
};
} // namespace Vulkan

View File

@@ -0,0 +1,395 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstddef>
#include <memory>
#include <vector>
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader/compiler_settings.h"
namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
using Tegra::Engines::ShaderType;
namespace {
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
VideoCommon::Shader::CompileDepth::FullDecompile};
/// Gets the address for the specified shader stage program
GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
const auto& gpu{system.GPU().Maxwell3D()};
const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
return gpu.regs.code_address.CodeAddress() + shader_config.offset;
}
/// Gets if the current instruction offset is a scheduler instruction
constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
// Sched instructions appear once every 4 instructions.
constexpr std::size_t SchedPeriod = 4;
const std::size_t absolute_offset = offset - main_offset;
return (absolute_offset % SchedPeriod) == 0;
}
/// Calculates the size of a program stream
std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
const std::size_t start_offset = is_compute ? 0 : 10;
// This is the encoded version of BRA that jumps to itself. All Nvidia
// shaders end with one.
constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
std::size_t offset = start_offset;
while (offset < program.size()) {
const u64 instruction = program[offset];
if (!IsSchedInstruction(offset, start_offset)) {
if ((instruction & mask) == self_jumping_branch) {
// End on Maxwell's "nop" instruction
break;
}
if (instruction == 0) {
break;
}
}
++offset;
}
// The last instruction is included in the program size
return std::min(offset + 1, program.size());
}
/// Gets the shader program code from memory for the specified address
ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
const u8* host_ptr, bool is_compute) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
ASSERT_OR_EXECUTE(host_ptr != nullptr, {
std::fill(program_code.begin(), program_code.end(), 0);
return program_code;
});
memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
program_code.size() * sizeof(u64));
program_code.resize(CalculateProgramSize(program_code, is_compute));
return program_code;
}
constexpr std::size_t GetStageFromProgram(std::size_t program) {
return program == 0 ? 0 : program - 1;
}
constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
}
ShaderType GetShaderType(Maxwell::ShaderProgram program) {
switch (program) {
case Maxwell::ShaderProgram::VertexB:
return ShaderType::Vertex;
case Maxwell::ShaderProgram::TesselationControl:
return ShaderType::TesselationControl;
case Maxwell::ShaderProgram::TesselationEval:
return ShaderType::TesselationEval;
case Maxwell::ShaderProgram::Geometry:
return ShaderType::Geometry;
case Maxwell::ShaderProgram::Fragment:
return ShaderType::Fragment;
default:
UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program));
return ShaderType::Vertex;
}
}
u32 FillDescriptorLayout(const ShaderEntries& entries,
std::vector<vk::DescriptorSetLayoutBinding>& bindings,
Maxwell::ShaderProgram program_type, u32 base_binding) {
const ShaderType stage = GetStageFromProgram(program_type);
const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage);
u32 binding = base_binding;
const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
for (std::size_t i = 0; i < num_entries; ++i) {
bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr);
}
};
AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
return binding;
}
} // Anonymous namespace
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
ProgramCode program_code, u32 main_offset)
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
shader_ir{this->program_code, main_offset, compiler_settings, locker},
entries{GenerateShaderEntries(shader_ir)} {}
CachedShader::~CachedShader() = default;
Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
Core::System& system, Tegra::Engines::ShaderType stage) {
if (stage == Tegra::Engines::ShaderType::Compute) {
return system.GPU().KeplerCompute();
} else {
return system.GPU().Maxwell3D();
}
}
VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue)
: RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
renderpass_cache(device) {}
VKPipelineCache::~VKPipelineCache() = default;
std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const auto& gpu = system.GPU().Maxwell3D();
auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
if (!dirty) {
return last_shaders;
}
dirty = false;
std::array<Shader, Maxwell::MaxShaderProgram> shaders;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
if (!gpu.regs.IsShaderConfigEnabled(index)) {
continue;
}
auto& memory_manager{system.GPU().MemoryManager()};
const GPUVAddr program_addr{GetShaderAddress(system, program)};
const auto host_ptr{memory_manager.GetPointer(program_addr)};
auto shader = TryGet(host_ptr);
if (!shader) {
// No shader found - create a new one
constexpr u32 stage_offset = 10;
const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
host_ptr, std::move(code), stage_offset);
Register(shader);
}
shaders[index] = std::move(shader);
}
return last_shaders = shaders;
}
VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
if (last_graphics_pipeline && last_graphics_key == key) {
return *last_graphics_pipeline;
}
last_graphics_key = key;
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
auto& entry = pair->second;
if (is_cache_miss) {
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key);
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, renderpass_cache, key,
bindings, program);
}
return *(last_graphics_pipeline = entry.get());
}
VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
auto& entry = pair->second;
if (!is_cache_miss) {
return *entry;
}
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
auto& memory_manager = system.GPU().MemoryManager();
const auto program_addr = key.shader;
const auto host_ptr = memory_manager.GetPointer(program_addr);
auto shader = TryGet(host_ptr);
if (!shader) {
// No shader found - create a new one
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
constexpr u32 kernel_main_offset = 0;
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
program_addr, *cpu_addr, host_ptr, std::move(code),
kernel_main_offset);
Register(shader);
}
Specialization specialization;
specialization.workgroup_size = key.workgroup_size;
specialization.shared_memory_size = key.shared_memory_size;
const SPIRVShader spirv_shader{
Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
shader->GetEntries()};
entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, spirv_shader);
return *entry;
}
void VKPipelineCache::Unregister(const Shader& shader) {
bool finished = false;
const auto Finish = [&] {
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
// flush.
if (finished) {
return;
}
finished = true;
scheduler.Finish();
};
const GPUVAddr invalidated_addr = shader->GetGpuAddr();
for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
auto& entry = it->first;
if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
entry.shaders.end()) {
++it;
continue;
}
Finish();
it = graphics_cache.erase(it);
}
for (auto it = compute_cache.begin(); it != compute_cache.end();) {
auto& entry = it->first;
if (entry.shader != invalidated_addr) {
++it;
continue;
}
Finish();
it = compute_cache.erase(it);
}
RasterizerCache::Unregister(shader);
}
std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>>
VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto& fixed_state = key.fixed_state;
auto& memory_manager = system.GPU().MemoryManager();
const auto& gpu = system.GPU().Maxwell3D();
Specialization specialization;
specialization.primitive_topology = fixed_state.input_assembly.topology;
if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
ASSERT(fixed_state.input_assembly.point_size != 0.0f);
specialization.point_size = fixed_state.input_assembly.point_size;
}
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
}
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
specialization.tessellation.primitive = fixed_state.tessellation.primitive;
specialization.tessellation.spacing = fixed_state.tessellation.spacing;
specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
for (const auto& rt : key.renderpass_params.color_attachments) {
specialization.enabled_rendertargets.set(rt.index);
}
SPIRVProgram program;
std::vector<vk::DescriptorSetLayoutBinding> bindings;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
// Skip stages that are not enabled
if (!gpu.regs.IsShaderConfigEnabled(index)) {
continue;
}
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
const auto shader = TryGet(host_ptr);
ASSERT(shader);
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
const auto program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
entries};
if (program_enum == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
++index;
}
const u32 old_binding = specialization.base_binding;
specialization.base_binding =
FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
}
return {std::move(program), std::move(bindings)};
}
void FillDescriptorUpdateTemplateEntries(
const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) {
const u32 count = static_cast<u32>(count_);
if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer &&
device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
// Nvidia has a bug where updating multiple uniform texels at once causes the driver to
// crash.
for (u32 i = 0; i < count; ++i) {
template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
offset + i * entry_size, entry_size);
}
} else if (count != 0) {
template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
}
offset += count * entry_size;
binding += count;
};
AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
AddEntry(vk::DescriptorType::eStorageImage, entries.images.size());
}
} // namespace Vulkan

View File

@@ -0,0 +1,200 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <memory>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <vector>
#include <boost/functional/hash.hpp>
#include "common/common_types.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/surface.h"
namespace Core {
class System;
}
namespace Vulkan {
class RasterizerVulkan;
class VKComputePipeline;
class VKDescriptorPool;
class VKDevice;
class VKFence;
class VKScheduler;
class VKUpdateDescriptorQueue;
class CachedShader;
using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ProgramCode = std::vector<u64>;
struct GraphicsPipelineCacheKey {
FixedPipelineState fixed_state;
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
RenderPassParams renderpass_params;
std::size_t Hash() const noexcept {
std::size_t hash = fixed_state.Hash();
for (const auto& shader : shaders) {
boost::hash_combine(hash, shader);
}
boost::hash_combine(hash, renderpass_params.Hash());
return hash;
}
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
return std::tie(fixed_state, shaders, renderpass_params) ==
std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
}
};
struct ComputePipelineCacheKey {
GPUVAddr shader{};
u32 shared_memory_size{};
std::array<u32, 3> workgroup_size{};
std::size_t Hash() const noexcept {
return static_cast<std::size_t>(shader) ^
((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
static_cast<std::size_t>(workgroup_size[0]) ^
(static_cast<std::size_t>(workgroup_size[1]) << 16) ^
(static_cast<std::size_t>(workgroup_size[2]) << 24);
}
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
return std::tie(shader, shared_memory_size, workgroup_size) ==
std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
}
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::GraphicsPipelineCacheKey> {
std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::ComputePipelineCacheKey> {
std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class CachedShader final : public RasterizerCacheObject {
public:
explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
~CachedShader();
GPUVAddr GetGpuAddr() const {
return gpu_addr;
}
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return program_code.size() * sizeof(u64);
}
VideoCommon::Shader::ShaderIR& GetIR() {
return shader_ir;
}
const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
const ShaderEntries& GetEntries() const {
return entries;
}
private:
static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
Tegra::Engines::ShaderType stage);
GPUVAddr gpu_addr{};
VAddr cpu_addr{};
ProgramCode program_code;
VideoCommon::Shader::ConstBufferLocker locker;
VideoCommon::Shader::ShaderIR shader_ir;
ShaderEntries entries;
};
class VKPipelineCache final : public RasterizerCache<Shader> {
public:
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue);
~VKPipelineCache();
std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
protected:
void Unregister(const Shader& shader) override;
void FlushObjectInner(const Shader& object) override {}
private:
std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders(
const GraphicsPipelineCacheKey& key);
Core::System& system;
const VKDevice& device;
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
VKRenderPassCache renderpass_cache;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
GraphicsPipelineCacheKey last_graphics_key;
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
graphics_cache;
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
};
void FillDescriptorUpdateTemplateEntries(
const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
} // namespace Vulkan

View File

@@ -0,0 +1,13 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/rasterizer_interface.h"
namespace Vulkan {
class RasterizerVulkan : public VideoCore::RasterizerInterface {};
} // namespace Vulkan

View File

@@ -0,0 +1,100 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <vector>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
namespace Vulkan {
VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
VKRenderPassCache::~VKRenderPassCache() = default;
vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
const auto [pair, is_cache_miss] = cache.try_emplace(params);
auto& entry = pair->second;
if (is_cache_miss) {
entry = CreateRenderPass(params);
}
return *entry;
}
UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
std::vector<vk::AttachmentDescription> descriptors;
std::vector<vk::AttachmentReference> color_references;
for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
const auto attachment = params.color_attachments[rt];
const auto format =
MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format);
ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
static_cast<u32>(attachment.pixel_format));
// TODO(Rodrigo): Add eMayAlias when it's needed.
const auto color_layout = attachment.is_texception
? vk::ImageLayout::eGeneral
: vk::ImageLayout::eColorAttachmentOptimal;
descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format,
vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
vk::AttachmentStoreOp::eDontCare, color_layout, color_layout);
color_references.emplace_back(static_cast<u32>(rt), color_layout);
}
vk::AttachmentReference zeta_attachment_ref;
if (params.has_zeta) {
const auto format =
MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
static_cast<u32>(params.zeta_pixel_format));
const auto zeta_layout = params.zeta_texception
? vk::ImageLayout::eGeneral
: vk::ImageLayout::eDepthStencilAttachmentOptimal;
descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format,
vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad,
vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout);
zeta_attachment_ref =
vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout);
}
const vk::SubpassDescription subpass_description(
{}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()),
color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0,
nullptr);
vk::AccessFlags access;
vk::PipelineStageFlags stage;
if (!color_references.empty()) {
access |=
vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
}
if (params.has_zeta) {
access |= vk::AccessFlagBits::eDepthStencilAttachmentRead |
vk::AccessFlagBits::eDepthStencilAttachmentWrite;
stage |= vk::PipelineStageFlagBits::eLateFragmentTests;
}
const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access,
{});
const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()),
descriptors.data(), 1, &subpass_description, 1,
&subpass_dependency);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createRenderPassUnique(create_info, nullptr, dld);
}
} // namespace Vulkan

View File

@@ -0,0 +1,97 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <tuple>
#include <unordered_map>
#include <boost/container/static_vector.hpp>
#include <boost/functional/hash.hpp>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/surface.h"
namespace Vulkan {
class VKDevice;
// TODO(Rodrigo): Optimize this structure for faster hashing
struct RenderPassParams {
struct ColorAttachment {
u32 index = 0;
VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid;
bool is_texception = false;
std::size_t Hash() const noexcept {
return static_cast<std::size_t>(pixel_format) |
static_cast<std::size_t>(is_texception) << 6 |
static_cast<std::size_t>(index) << 7;
}
bool operator==(const ColorAttachment& rhs) const noexcept {
return std::tie(index, pixel_format, is_texception) ==
std::tie(rhs.index, rhs.pixel_format, rhs.is_texception);
}
};
boost::container::static_vector<ColorAttachment,
Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
color_attachments{};
// TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type.
VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid;
bool has_zeta = false;
bool zeta_texception = false;
std::size_t Hash() const noexcept {
std::size_t hash = 0;
for (const auto& rt : color_attachments) {
boost::hash_combine(hash, rt.Hash());
}
boost::hash_combine(hash, zeta_pixel_format);
boost::hash_combine(hash, has_zeta);
boost::hash_combine(hash, zeta_texception);
return hash;
}
bool operator==(const RenderPassParams& rhs) const {
return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) ==
std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta,
rhs.zeta_texception);
}
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::RenderPassParams> {
std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class VKRenderPassCache final {
public:
explicit VKRenderPassCache(const VKDevice& device);
~VKRenderPassCache();
vk::RenderPass GetRenderPass(const RenderPassParams& params);
private:
UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const;
const VKDevice& device;
std::unordered_map<RenderPassParams, UniqueRenderPass> cache;
};
} // namespace Vulkan

View File

@@ -0,0 +1,57 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <variant>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
namespace Vulkan {
VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler)
: device{device}, scheduler{scheduler} {}
VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
void VKUpdateDescriptorQueue::TickFrame() {
payload.clear();
}
void VKUpdateDescriptorQueue::Acquire() {
entries.clear();
}
void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template,
vk::DescriptorSet set) {
if (payload.size() + entries.size() >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
payload.clear();
}
const auto payload_start = payload.data() + payload.size();
for (const auto& entry : entries) {
if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) {
payload.push_back(*image);
} else if (const auto buffer = std::get_if<Buffer>(&entry)) {
payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size);
} else if (const auto texel = std::get_if<vk::BufferView>(&entry)) {
payload.push_back(*texel);
} else {
UNREACHABLE();
}
}
scheduler.Record([dev = device.GetLogical(), payload_start, set,
update_template]([[maybe_unused]] auto cmdbuf, auto& dld) {
dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld);
});
}
} // namespace Vulkan

View File

@@ -0,0 +1,86 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <type_traits>
#include <variant>
#include <boost/container/static_vector.hpp>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan {
class VKDevice;
class VKScheduler;
class DescriptorUpdateEntry {
public:
explicit DescriptorUpdateEntry() : image{} {}
DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {}
DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size)
: buffer{buffer, offset, size} {}
DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {}
private:
union {
vk::DescriptorImageInfo image;
vk::DescriptorBufferInfo buffer;
vk::BufferView texel_buffer;
};
};
class VKUpdateDescriptorQueue final {
public:
explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler);
~VKUpdateDescriptorQueue();
void TickFrame();
void Acquire();
void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set);
void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) {
entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}});
}
void AddImage(vk::ImageView image_view) {
entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}});
}
void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) {
entries.push_back(Buffer{buffer, offset, size});
}
void AddTexelBuffer(vk::BufferView texel_buffer) {
entries.emplace_back(texel_buffer);
}
vk::ImageLayout* GetLastImageLayout() {
return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout;
}
private:
struct Buffer {
const vk::Buffer* buffer{};
u64 offset{};
std::size_t size{};
};
using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>;
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Variant>);
const VKDevice& device;
VKScheduler& scheduler;
boost::container::static_vector<Variant, 0x400> entries;
boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
};
} // namespace Vulkan

View File

@@ -215,18 +215,11 @@ void GRenderWindow::moveContext() {
}
void GRenderWindow::SwapBuffers() {
// In our multi-threaded QWidget use case we shouldn't need to call `makeCurrent`,
// since we never call `doneCurrent` in this thread.
// However:
// - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
// since the last time `swapBuffers` was executed;
// - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks.
context->makeCurrent(child);
context->swapBuffers(child);
if (!first_frame) {
emit FirstFrameDisplayed();
first_frame = true;
emit FirstFrameDisplayed();
}
}