Compare commits

..

19 Commits

Author SHA1 Message Date
Lioncash
be7dad5e7e service/vi: Update IManagerDisplayService's function table
Amends it to add the 7.0.0+ CreateStrayLayer function.
2019-02-25 08:09:00 -05:00
bunnei
c07987dfab Merge pull request #2118 from FernandoS27/ipa-improve
shader_decompiler: Improve Accuracy of Attribute Interpolation.
2019-02-24 23:04:22 -05:00
bunnei
c4243c07cc Merge pull request #2119 from FernandoS27/fix-copy
rasterizer_cache_gl: Only do fast layered copy on the same format.
2019-02-24 23:03:52 -05:00
bunnei
c6170565b5 Merge pull request #2155 from FearlessTobi/port-4655
Port citra-emu/citra#4655: "Remove GCC version checks"
2019-02-24 23:03:13 -05:00
bunnei
57985fb16a Merge pull request #2144 from lioncash/factor
service/vi: Convert Display and Layer structs into classes
2019-02-24 23:02:50 -05:00
tgsm
030814b1cb Remove GCC version checks
Citra can't be compiled using GCC <7 because of required C++17 support, so these version checks don't need to exist anymore.
2019-02-24 15:24:06 +01:00
bunnei
90c780e6f3 Merge pull request #2139 from degasus/dma_pusher
video_core/dma_pusher: The full list of headers at once.
2019-02-24 04:15:49 -05:00
bunnei
f7090bacc5 Merge pull request #2146 from ReinUsesLisp/vulkan-scheduler
vk_scheduler: Implement a scheduler
2019-02-23 23:32:43 -05:00
bunnei
d062991643 Merge pull request #2150 from ReinUsesLisp/fixup-layer-swizzle
gl_rasterizer_cache: Fixup parameter order in layered swizzle
2019-02-23 23:31:34 -05:00
bunnei
4ab978d670 Merge pull request #2151 from ReinUsesLisp/fixup-vk-memory-manager
vk_memory_manager: Fixup commit interval allocation
2019-02-23 23:29:53 -05:00
ReinUsesLisp
92050c4d86 vk_memory_manager: Fixup commit interval allocation
VKMemoryCommitImpl was using as the end of its interval "begin + end".
That ended up wasting memory.
2019-02-24 01:04:41 -03:00
ReinUsesLisp
abef11a540 gl_rasterizer_cache: Fixup parameter order in layered swizzle 2019-02-23 23:27:30 -03:00
ReinUsesLisp
f546fb35ed vk_scheduler: Implement a scheduler
The scheduler abstracts command buffer and fence management with an
interface that's able to do OpenGL-like operations on Vulkan command
buffers.

It returns by value a command buffer and fence that have to be used for
subsequent operations until Flush or Finish is executed, after that the
current execution context (the pair of command buffers and fences) gets
invalidated a new one must be fetched. Thankfully validation layers will
quickly detect if this is skipped throwing an error due to modifications
to a sent command buffer.
2019-02-22 01:33:32 -03:00
bunnei
94b27bb8a5 Merge pull request #2138 from ReinUsesLisp/vulkan-memory-manager
vk_memory_manager: Implement memory manager
2019-02-21 22:26:54 -05:00
Markus Wick
6dd40976d0 video_core/dma_pusher: Simplyfy Step() logic.
As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call.
This cleans up the Step() logic quite a bit.
2019-02-19 10:28:42 +01:00
Markus Wick
717394c980 video_core/dma_pusher: The full list of headers at once.
Fetching every u32 from memory leads to a big overhead. So let's fetch all of them as a block if possible.
This reduces the Memory::* calls by the dma_pusher by a factor of 10.
2019-02-19 09:58:38 +01:00
ReinUsesLisp
b675c97cdd vk_memory_manager: Implement memory manager
A memory manager object handles the memory allocations for a device. It
allocates chunks of Vulkan memory objects and then suballocates.
2019-02-19 03:42:28 -03:00
Fernando Sahmkow
10682ad7e0 shader_decompiler: Improve Accuracy of Attribute Interpolation. 2019-02-14 03:25:07 -04:00
Fernando Sahmkow
bb41683394 rasterizer_cache_gl: Only do fast layered copy on the same format. As
glCopyImageSubData does not support different formats.
2019-02-13 16:55:00 -04:00
17 changed files with 591 additions and 72 deletions

View File

@@ -28,8 +28,8 @@
#include <cstring>
#include "common/common_types.h"
// GCC 4.6+
#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
// GCC
#ifdef __GNUC__
#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
#define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
#endif
// LLVM/clang
#elif __clang__
#elif defined(__clang__)
#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
#define COMMON_LITTLE_ENDIAN 1

View File

@@ -752,6 +752,7 @@ public:
{1102, nullptr, "GetDisplayResolution"},
{2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
{2011, nullptr, "DestroyManagedLayer"},
{2012, nullptr, "CreateStrayLayer"},
{2050, nullptr, "CreateIndirectLayer"},
{2051, nullptr, "DestroyIndirectLayer"},
{2052, nullptr, "CreateIndirectProducerEndPoint"},

View File

@@ -106,8 +106,12 @@ if (ENABLE_VULKAN)
renderer_vulkan/declarations.h
renderer_vulkan/vk_device.cpp
renderer_vulkan/vk_device.h
renderer_vulkan/vk_memory_manager.cpp
renderer_vulkan/vk_memory_manager.h
renderer_vulkan/vk_resource_manager.cpp
renderer_vulkan/vk_resource_manager.h)
renderer_vulkan/vk_resource_manager.h
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h)
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)

View File

@@ -33,18 +33,36 @@ void DmaPusher::DispatchCalls() {
}
bool DmaPusher::Step() {
if (dma_get != dma_put) {
// Push buffer non-empty, read a word
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
ASSERT_MSG(address, "Invalid GPU address");
if (!ib_enable || dma_pushbuffer.empty()) {
// pushbuffer empty and IB empty or nonexistent - nothing to do
return false;
}
const CommandHeader command_header{Memory::Read32(*address)};
const CommandList& command_list{dma_pushbuffer.front()};
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
GPUVAddr dma_get = command_list_header.addr;
GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
bool non_main = command_list_header.is_non_main;
dma_get += sizeof(u32);
if (dma_pushbuffer_subindex >= command_list.size()) {
// We've gone through the current list, remove it from the queue
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
}
if (!non_main) {
dma_mget = dma_get;
}
if (command_list_header.size == 0) {
return true;
}
// Push buffer non-empty, read a word
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
ASSERT_MSG(address, "Invalid GPU address");
command_headers.resize(command_list_header.size);
Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
for (const CommandHeader& command_header : command_headers) {
// now, see if we're in the middle of a command
if (dma_state.length_pending) {
@@ -91,22 +109,11 @@ bool DmaPusher::Step() {
break;
}
}
} else if (ib_enable && !dma_pushbuffer.empty()) {
// Current pushbuffer empty, but we have more IB entries to read
const CommandList& command_list{dma_pushbuffer.front()};
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
dma_get = command_list_header.addr;
dma_put = dma_get + command_list_header.size * sizeof(u32);
non_main = command_list_header.is_non_main;
}
if (dma_pushbuffer_subindex >= command_list.size()) {
// We've gone through the current list, remove it from the queue
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
}
} else {
// Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
return {};
if (!non_main) {
// TODO (degasus): This is dead code, as dma_mget is never read.
dma_mget = dma_put;
}
return true;

View File

@@ -75,6 +75,8 @@ private:
GPU& gpu;
std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
@@ -89,11 +91,8 @@ private:
DmaState dma_state{};
bool dma_increment_once{};
GPUVAddr dma_put{}; ///< pushbuffer current end address
GPUVAddr dma_get{}; ///< pushbuffer current read address
GPUVAddr dma_mget{}; ///< main pushbuffer last read address
bool ib_enable{true}; ///< IB mode enabled
bool non_main{}; ///< non-main pushbuffer active
};
} // namespace Tegra

View File

@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
};
enum class IpaInterpMode : u64 {
Linear = 0,
Perspective = 1,
Flat = 2,
Pass = 0,
Multiply = 1,
Constant = 2,
Sc = 3,
};

View File

@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
TriangleStrip = 7,
};
enum class AttributeUse : u8 {
Unused = 0,
Constant = 1,
Perspective = 2,
ScreenLinear = 3,
};
// Documentation in:
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
struct Header {
@@ -84,9 +91,15 @@ struct Header {
} vtg;
struct {
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
union {
BitField<0, 2, AttributeUse> x;
BitField<2, 2, AttributeUse> y;
BitField<4, 2, AttributeUse> w;
BitField<6, 2, AttributeUse> z;
u8 raw;
} imap_generic_vector[32];
INSERT_PADDING_BYTES(2); // ImapColor
INSERT_PADDING_BYTES(2); // ImapSystemValuesC
INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,6 +116,28 @@ struct Header {
const u32 bit = render_target * 4 + component;
return omap.target & (1 << bit);
}
AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
return static_cast<AttributeUse>(
(imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
}
AttributeUse GetAttributeUse(u32 attribute) const {
AttributeUse result = AttributeUse::Unused;
for (u32 i = 0; i < 4; i++) {
const auto index = GetAttributeIndexUse(attribute, i);
if (index == AttributeUse::Unused) {
continue;
}
if (result == AttributeUse::Unused || result == index) {
result = index;
continue;
}
LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
if (index == AttributeUse::Perspective) {
result = index;
}
}
return result;
}
} ps;
};

View File

@@ -423,7 +423,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
for (u32 i = 0; i < params.depth; i++) {
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), params.tile_width_spacing, 1,
params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
offset += layer_size;
offset_gl += gl_size;
@@ -1257,7 +1257,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
FastLayeredCopySurface(old_surface, new_surface);
if (old_params.pixel_format == new_params.pixel_format)
FastLayeredCopySurface(old_surface, new_surface);
else {
AccurateCopySurface(old_surface, new_surface);
}
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",

View File

@@ -20,6 +20,7 @@
namespace OpenGL::GLShader {
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::Header;
using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
@@ -288,34 +289,22 @@ private:
code.AddNewLine();
}
std::string GetInputFlags(const IpaMode& input_mode) {
const IpaSampleMode sample_mode = input_mode.sampling_mode;
const IpaInterpMode interp_mode = input_mode.interpolation_mode;
std::string GetInputFlags(AttributeUse attribute) {
std::string out;
switch (interp_mode) {
case IpaInterpMode::Flat:
switch (attribute) {
case AttributeUse::Constant:
out += "flat ";
break;
case IpaInterpMode::Linear:
case AttributeUse::ScreenLinear:
out += "noperspective ";
break;
case IpaInterpMode::Perspective:
case AttributeUse::Perspective:
// Default, Smooth
break;
default:
UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
}
switch (sample_mode) {
case IpaSampleMode::Centroid:
// It can be implemented with the "centroid " keyword in GLSL
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
break;
case IpaSampleMode::Default:
// Default, n/a
break;
default:
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
UNREACHABLE();
}
return out;
}
@@ -324,16 +313,11 @@ private:
const auto& attributes = ir.GetInputAttributes();
for (const auto element : attributes) {
const Attribute::Index index = element.first;
const IpaMode& input_mode = *element.second.begin();
if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
// Skip when it's not a generic attribute
continue;
}
ASSERT(element.second.size() > 0);
UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
"Multiple input flag modes are not supported in GLSL");
// TODO(bunnei): Use proper number of elements for these
u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
if (stage != ShaderStage::Vertex) {
@@ -345,8 +329,14 @@ private:
if (stage == ShaderStage::Geometry) {
attr = "gs_" + attr + "[]";
}
code.AddLine("layout (location = " + std::to_string(idx) + ") " +
GetInputFlags(input_mode) + "in vec4 " + attr + ';');
std::string suffix;
if (stage == ShaderStage::Fragment) {
const auto input_mode =
header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
suffix = GetInputFlags(input_mode);
}
code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
attr + ';');
}
if (!attributes.empty())
code.AddNewLine();
@@ -1584,4 +1574,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
return {decompiler.GetResult(), decompiler.GetShaderEntries()};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

View File

@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
layout (location = 0) in vec4 position;
layout (location = 0) in noperspective vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
return {out, program.second};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

View File

@@ -0,0 +1,252 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <optional>
#include <tuple>
#include <vector>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
namespace Vulkan {
// TODO(Rodrigo): Fine tune this number
constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
class VKMemoryAllocation final {
public:
explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
: device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
shifted_type{ShiftType(type)}, is_mappable{properties &
vk::MemoryPropertyFlagBits::eHostVisible} {
if (is_mappable) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
}
}
~VKMemoryAllocation() {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
if (is_mappable)
dev.unmapMemory(memory, dld);
dev.free(memory, nullptr, dld);
}
VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
static_cast<u64>(alignment));
if (!found) {
found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
static_cast<u64>(alignment));
if (!found) {
// Signal out of memory, it'll try to do more allocations.
return nullptr;
}
}
u8* address = is_mappable ? base_address + *found : nullptr;
auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
*found + commit_size);
commits.push_back(commit.get());
// Last commit's address is highly probable to be free.
free_iterator = *found + commit_size;
return commit;
}
void Free(const VKMemoryCommitImpl* commit) {
ASSERT(commit);
const auto it =
std::find_if(commits.begin(), commits.end(),
[&](const auto& stored_commit) { return stored_commit == commit; });
if (it == commits.end()) {
LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
UNREACHABLE();
return;
}
commits.erase(it);
}
/// Returns whether this allocation is compatible with the arguments.
bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const {
return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) &&
(type_mask & shifted_type) != 0;
}
private:
static constexpr u32 ShiftType(u32 type) {
return 1U << type;
}
/// A memory allocator, it may return a free region between "start" and "end" with the solicited
/// requeriments.
std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
u64 iterator = start;
while (iterator + size < end) {
const u64 try_left = Common::AlignUp(iterator, alignment);
const u64 try_right = try_left + size;
bool overlap = false;
for (const auto& commit : commits) {
const auto [commit_left, commit_right] = commit->interval;
if (try_left < commit_right && commit_left < try_right) {
// There's an overlap, continue the search where the overlapping commit ends.
iterator = commit_right;
overlap = true;
break;
}
}
if (!overlap) {
// A free address has been found.
return try_left;
}
}
// No free regions where found, return an empty optional.
return std::nullopt;
}
const VKDevice& device; ///< Vulkan device.
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
const u64 alloc_size; ///< Size of this allocation.
const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
const bool is_mappable; ///< Whether the allocation is mappable.
/// Base address of the mapped pointer.
u8* base_address{};
/// Hints where the next free region is likely going to be.
u64 free_iterator{};
/// Stores all commits done from this allocation.
std::vector<const VKMemoryCommitImpl*> commits;
};
VKMemoryManager::VKMemoryManager(const VKDevice& device)
: device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
is_memory_unified{GetMemoryUnified(props)} {}
VKMemoryManager::~VKMemoryManager() = default;
VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
// When a host visible commit is asked, search for host visible and coherent, otherwise search
// for a fast device local type.
const vk::MemoryPropertyFlags wanted_properties =
host_visible
? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
: vk::MemoryPropertyFlagBits::eDeviceLocal;
const auto TryCommit = [&]() -> VKMemoryCommit {
for (auto& alloc : allocs) {
if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
continue;
if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
return commit;
}
}
return {};
};
if (auto commit = TryCommit(); commit) {
return commit;
}
// Commit has failed, allocate more memory.
if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
// TODO(Rodrigo): Try to use host memory.
LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
UNREACHABLE();
}
// Commit again, this time it won't fail since there's a fresh allocation above. If it does,
// there's a bug.
auto commit = TryCommit();
ASSERT(commit);
return commit;
}
VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
auto commit = Commit(requeriments, host_visible);
dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
return commit;
}
VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const auto requeriments = dev.getImageMemoryRequirements(image, dld);
auto commit = Commit(requeriments, host_visible);
dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
return commit;
}
bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
u64 size) {
const u32 type = [&]() {
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
const auto flags = props.memoryTypes[type_index].propertyFlags;
if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
// The type matches in type and in the wanted properties.
return type_index;
}
}
LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
UNREACHABLE();
return 0u;
}();
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
// Try to allocate found type.
const vk::MemoryAllocateInfo memory_ai(size, type);
vk::DeviceMemory memory;
if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
res != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
return false;
}
allocs.push_back(
std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
return true;
}
/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
// Memory is considered unified when heaps are device local only.
return false;
}
}
return true;
}
VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
u8* data, u64 begin, u64 end)
: allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {}
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
allocation->Free(this);
}
u8* VKMemoryCommitImpl::GetData() const {
ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
return data;
}
} // namespace Vulkan

View File

@@ -0,0 +1,87 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan {
class VKDevice;
class VKMemoryAllocation;
class VKMemoryCommitImpl;
using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
class VKMemoryManager final {
public:
explicit VKMemoryManager(const VKDevice& device);
~VKMemoryManager();
/**
* Commits a memory with the specified requeriments.
* @param reqs Requeriments returned from a Vulkan call.
* @param host_visible Signals the allocator that it *must* use host visible and coherent
* memory. When passing false, it will try to allocate device local memory.
* @returns A memory commit.
*/
VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
/// Commits memory required by the buffer and binds it.
VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible);
/// Commits memory required by the image and binds it.
VKMemoryCommit Commit(vk::Image image, bool host_visible);
/// Returns true if the memory allocations are done always in host visible and coherent memory.
bool IsMemoryUnified() const {
return is_memory_unified;
}
private:
/// Allocates a chunk of memory.
bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
/// Returns true if the device uses an unified memory model.
static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
const VKDevice& device; ///< Device handler.
const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties.
const bool is_memory_unified; ///< True if memory model is unified.
std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
};
class VKMemoryCommitImpl final {
friend VKMemoryAllocation;
public:
explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
u64 begin, u64 end);
~VKMemoryCommitImpl();
/// Returns the writeable memory map. The commit has to be mappable.
u8* GetData() const;
/// Returns the Vulkan memory handler.
vk::DeviceMemory GetMemory() const {
return memory;
}
/// Returns the start position of the commit relative to the allocation.
vk::DeviceSize GetOffset() const {
return static_cast<vk::DeviceSize>(interval.first);
}
private:
std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
vk::DeviceMemory memory; ///< Vulkan device memory handler.
VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
};
} // namespace Vulkan

View File

@@ -0,0 +1,60 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
: device{device}, resource_manager{resource_manager} {
next_fence = &resource_manager.CommitFence();
AllocateNewContext();
}
VKScheduler::~VKScheduler() = default;
VKExecutionContext VKScheduler::GetExecutionContext() const {
return VKExecutionContext(current_fence, current_cmdbuf);
}
VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
SubmitExecution(semaphore);
current_fence->Release();
AllocateNewContext();
return GetExecutionContext();
}
VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) {
SubmitExecution(semaphore);
current_fence->Wait();
current_fence->Release();
AllocateNewContext();
return GetExecutionContext();
}
void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
const auto& dld = device.GetDispatchLoader();
current_cmdbuf.end(dld);
const auto queue = device.GetGraphicsQueue();
const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
&semaphore);
queue.submit({submit_info}, *current_fence, dld);
}
void VKScheduler::AllocateNewContext() {
current_fence = next_fence;
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
next_fence = &resource_manager.CommitFence();
const auto& dld = device.GetDispatchLoader();
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
}
} // namespace Vulkan

View File

@@ -0,0 +1,69 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan {
class VKDevice;
class VKExecutionContext;
class VKFence;
class VKResourceManager;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class VKScheduler {
public:
explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
~VKScheduler();
/// Gets the current execution context.
[[nodiscard]] VKExecutionContext GetExecutionContext() const;
/// Sends the current execution context to the GPU. It invalidates the current execution context
/// and returns a new one.
VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete. It invalidates
/// the current execution context and returns a new one.
VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
private:
void SubmitExecution(vk::Semaphore semaphore);
void AllocateNewContext();
const VKDevice& device;
VKResourceManager& resource_manager;
vk::CommandBuffer current_cmdbuf;
VKFence* current_fence = nullptr;
VKFence* next_fence = nullptr;
};
class VKExecutionContext {
friend class VKScheduler;
public:
VKExecutionContext() = default;
VKFence& GetFence() const {
return *fence;
}
vk::CommandBuffer GetCommandBuffer() const {
return cmdbuf;
}
private:
explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
: fence{fence}, cmdbuf{cmdbuf} {}
VKFence* fence{};
vk::CommandBuffer cmdbuf;
};
} // namespace Vulkan

View File

@@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
Tegra::Shader::IpaSampleMode::Default};
u64 next_element = instr.attribute.fmt20.element;

View File

@@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
instr.ipa.sample_mode.Value()};
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
Node value = attr;
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
index <= Tegra::Shader::Attribute::Index::Attribute_31) {
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
// In theory by setting them as perspective, OpenGL does the perspective correction.
// A way must figured to reverse the last step of it.
if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
}
}
value = GetSaturatedFloat(value, instr.ipa.saturate);
SetRegister(bb, instr.gpr0, value);
break;
@@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader