service/vi: Update IManagerDisplayService's function table

Amends it to add the 7.0.0+ CreateStrayLayer function.
Merge pull request #2118 from FernandoS27/ipa-improve
2019-02-25 08:09:00 -05:00 · 2019-02-24 23:04:22 -05:00 · 2019-02-24 23:03:52 -05:00 · 2019-02-24 23:03:13 -05:00 · 2019-02-24 23:02:50 -05:00 · 2019-02-24 15:24:06 +01:00
17 changed files with 591 additions and 72 deletions
--- a/externals/Vulkan-Headers
+++ b/externals/Vulkan-Headers
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -28,8 +28,8 @@
 #include <cstring>
 #include "common/common_types.h"

-// GCC 4.6+
-#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+// GCC
+#ifdef __GNUC__

 #if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
 #endif

 // LLVM/clang
-#elif __clang__
+#elif defined(__clang__)

 #if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -752,6 +752,7 @@ public:
            {1102, nullptr, "GetDisplayResolution"},
            {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
            {2011, nullptr, "DestroyManagedLayer"},
+            {2012, nullptr, "CreateStrayLayer"},
            {2050, nullptr, "CreateIndirectLayer"},
            {2051, nullptr, "DestroyIndirectLayer"},
            {2052, nullptr, "CreateIndirectProducerEndPoint"},
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -106,8 +106,12 @@ if (ENABLE_VULKAN)
        renderer_vulkan/declarations.h
        renderer_vulkan/vk_device.cpp
        renderer_vulkan/vk_device.h
+        renderer_vulkan/vk_memory_manager.cpp
+        renderer_vulkan/vk_memory_manager.h
        renderer_vulkan/vk_resource_manager.cpp
-        renderer_vulkan/vk_resource_manager.h)
+        renderer_vulkan/vk_resource_manager.h
+        renderer_vulkan/vk_scheduler.cpp
+        renderer_vulkan/vk_scheduler.h)

    target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -33,18 +33,36 @@ void DmaPusher::DispatchCalls() {
 }

 bool DmaPusher::Step() {
-    if (dma_get != dma_put) {
-        // Push buffer non-empty, read a word
-        const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
-        ASSERT_MSG(address, "Invalid GPU address");
+    if (!ib_enable || dma_pushbuffer.empty()) {
+        // pushbuffer empty and IB empty or nonexistent - nothing to do
+        return false;
+    }

-        const CommandHeader command_header{Memory::Read32(*address)};
+    const CommandList& command_list{dma_pushbuffer.front()};
+    const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
+    GPUVAddr dma_get = command_list_header.addr;
+    GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
+    bool non_main = command_list_header.is_non_main;

-        dma_get += sizeof(u32);
+    if (dma_pushbuffer_subindex >= command_list.size()) {
+        // We've gone through the current list, remove it from the queue
+        dma_pushbuffer.pop();
+        dma_pushbuffer_subindex = 0;
+    }

-        if (!non_main) {
-            dma_mget = dma_get;
-        }
+    if (command_list_header.size == 0) {
+        return true;
+    }
+
+    // Push buffer non-empty, read a word
+    const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
+    ASSERT_MSG(address, "Invalid GPU address");
+
+    command_headers.resize(command_list_header.size);
+
+    Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
+
+    for (const CommandHeader& command_header : command_headers) {

        // now, see if we're in the middle of a command
        if (dma_state.length_pending) {
@@ -91,22 +109,11 @@ bool DmaPusher::Step() {
                break;
            }
        }
-    } else if (ib_enable && !dma_pushbuffer.empty()) {
-        // Current pushbuffer empty, but we have more IB entries to read
-        const CommandList& command_list{dma_pushbuffer.front()};
-        const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
-        dma_get = command_list_header.addr;
-        dma_put = dma_get + command_list_header.size * sizeof(u32);
-        non_main = command_list_header.is_non_main;
+    }

-        if (dma_pushbuffer_subindex >= command_list.size()) {
-            // We've gone through the current list, remove it from the queue
-            dma_pushbuffer.pop();
-            dma_pushbuffer_subindex = 0;
-        }
-    } else {
-        // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
-        return {};
+    if (!non_main) {
+        // TODO (degasus): This is dead code, as dma_mget is never read.
+        dma_mget = dma_put;
    }

    return true;
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -75,6 +75,8 @@ private:

    GPU& gpu;

+    std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
+
    std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
    std::size_t dma_pushbuffer_subindex{};  ///< Index within a command list within the pushbuffer

@@ -89,11 +91,8 @@ private:
    DmaState dma_state{};
    bool dma_increment_once{};

-    GPUVAddr dma_put{};   ///< pushbuffer current end address
-    GPUVAddr dma_get{};   ///< pushbuffer current read address
    GPUVAddr dma_mget{};  ///< main pushbuffer last read address
    bool ib_enable{true}; ///< IB mode enabled
-    bool non_main{};      ///< non-main pushbuffer active
 };

 } // namespace Tegra
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
 };

 enum class IpaInterpMode : u64 {
-    Linear = 0,
-    Perspective = 1,
-    Flat = 2,
+    Pass = 0,
+    Multiply = 1,
+    Constant = 2,
    Sc = 3,
 };

--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
    TriangleStrip = 7,
 };

+enum class AttributeUse : u8 {
+    Unused = 0,
+    Constant = 1,
+    Perspective = 2,
+    ScreenLinear = 3,
+};
+
 // Documentation in:
 // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
 struct Header {
@@ -84,9 +91,15 @@ struct Header {
        } vtg;

        struct {
-            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
-            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
-            INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
+            INSERT_PADDING_BYTES(3); // ImapSystemValuesA
+            INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+            union {
+                BitField<0, 2, AttributeUse> x;
+                BitField<2, 2, AttributeUse> y;
+                BitField<4, 2, AttributeUse> w;
+                BitField<6, 2, AttributeUse> z;
+                u8 raw;
+            } imap_generic_vector[32];
            INSERT_PADDING_BYTES(2);  // ImapColor
            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
            INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,6 +116,28 @@ struct Header {
                const u32 bit = render_target * 4 + component;
                return omap.target & (1 << bit);
            }
+            AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
+                return static_cast<AttributeUse>(
+                    (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
+            }
+            AttributeUse GetAttributeUse(u32 attribute) const {
+                AttributeUse result = AttributeUse::Unused;
+                for (u32 i = 0; i < 4; i++) {
+                    const auto index = GetAttributeIndexUse(attribute, i);
+                    if (index == AttributeUse::Unused) {
+                        continue;
+                    }
+                    if (result == AttributeUse::Unused || result == index) {
+                        result = index;
+                        continue;
+                    }
+                    LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
+                    if (index == AttributeUse::Perspective) {
+                        result = index;
+                    }
+                }
+                return result;
+            }
        } ps;
    };

--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -423,7 +423,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
        for (u32 i = 0; i < params.depth; i++) {
            MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
                          params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
-                          params.MipBlockDepth(mip_level), params.tile_width_spacing, 1,
+                          params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
                          gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
            offset += layer_size;
            offset_gl += gl_size;
@@ -1257,7 +1257,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    case SurfaceTarget::TextureCubemap:
    case SurfaceTarget::Texture2DArray:
    case SurfaceTarget::TextureCubeArray:
-        FastLayeredCopySurface(old_surface, new_surface);
+        if (old_params.pixel_format == new_params.pixel_format)
+            FastLayeredCopySurface(old_surface, new_surface);
+        else {
+            AccurateCopySurface(old_surface, new_surface);
+        }
        break;
    default:
        LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -20,6 +20,7 @@
 namespace OpenGL::GLShader {

 using Tegra::Shader::Attribute;
+using Tegra::Shader::AttributeUse;
 using Tegra::Shader::Header;
 using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::IpaMode;
@@ -288,34 +289,22 @@ private:
        code.AddNewLine();
    }

-    std::string GetInputFlags(const IpaMode& input_mode) {
-        const IpaSampleMode sample_mode = input_mode.sampling_mode;
-        const IpaInterpMode interp_mode = input_mode.interpolation_mode;
+    std::string GetInputFlags(AttributeUse attribute) {
        std::string out;

-        switch (interp_mode) {
-        case IpaInterpMode::Flat:
+        switch (attribute) {
+        case AttributeUse::Constant:
            out += "flat ";
            break;
-        case IpaInterpMode::Linear:
+        case AttributeUse::ScreenLinear:
            out += "noperspective ";
            break;
-        case IpaInterpMode::Perspective:
+        case AttributeUse::Perspective:
            // Default, Smooth
            break;
        default:
-            UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
-        }
-        switch (sample_mode) {
-        case IpaSampleMode::Centroid:
-            // It can be implemented with the "centroid " keyword in GLSL
-            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
-            break;
-        case IpaSampleMode::Default:
-            // Default, n/a
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
+            LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
+            UNREACHABLE();
        }
        return out;
    }
@@ -324,16 +313,11 @@ private:
        const auto& attributes = ir.GetInputAttributes();
        for (const auto element : attributes) {
            const Attribute::Index index = element.first;
-            const IpaMode& input_mode = *element.second.begin();
            if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
                // Skip when it's not a generic attribute
                continue;
            }

-            ASSERT(element.second.size() > 0);
-            UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
-                                 "Multiple input flag modes are not supported in GLSL");
-
            // TODO(bunnei): Use proper number of elements for these
            u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
            if (stage != ShaderStage::Vertex) {
@@ -345,8 +329,14 @@ private:
            if (stage == ShaderStage::Geometry) {
                attr = "gs_" + attr + "[]";
            }
-            code.AddLine("layout (location = " + std::to_string(idx) + ") " +
-                         GetInputFlags(input_mode) + "in vec4 " + attr + ';');
+            std::string suffix;
+            if (stage == ShaderStage::Fragment) {
+                const auto input_mode =
+                    header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
+                suffix = GetInputFlags(input_mode);
+            }
+            code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
+                         attr + ';');
        }
        if (!attributes.empty())
            code.AddNewLine();
@@ -1584,4 +1574,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
    return {decompiler.GetResult(), decompiler.GetShaderEntries()};
 }

-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
 layout (location = 6) out vec4 FragColor6;
 layout (location = 7) out vec4 FragColor7;

-layout (location = 0) in vec4 position;
+layout (location = 0) in noperspective vec4 position;

 layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
    vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
    return {out, program.second};
 }

-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -0,0 +1,252 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <optional>
+#include <tuple>
+#include <vector>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+// TODO(Rodrigo): Fine tune this number
+constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
+
+class VKMemoryAllocation final {
+public:
+    explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
+                                vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
+        : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
+          shifted_type{ShiftType(type)}, is_mappable{properties &
+                                                     vk::MemoryPropertyFlagBits::eHostVisible} {
+        if (is_mappable) {
+            const auto dev = device.GetLogical();
+            const auto& dld = device.GetDispatchLoader();
+            base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
+        }
+    }
+
+    ~VKMemoryAllocation() {
+        const auto dev = device.GetLogical();
+        const auto& dld = device.GetDispatchLoader();
+        if (is_mappable)
+            dev.unmapMemory(memory, dld);
+        dev.free(memory, nullptr, dld);
+    }
+
+    VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
+        auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
+                                        static_cast<u64>(alignment));
+        if (!found) {
+            found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
+                                       static_cast<u64>(alignment));
+            if (!found) {
+                // Signal out of memory, it'll try to do more allocations.
+                return nullptr;
+            }
+        }
+        u8* address = is_mappable ? base_address + *found : nullptr;
+        auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
+                                                           *found + commit_size);
+        commits.push_back(commit.get());
+
+        // Last commit's address is highly probable to be free.
+        free_iterator = *found + commit_size;
+
+        return commit;
+    }
+
+    void Free(const VKMemoryCommitImpl* commit) {
+        ASSERT(commit);
+        const auto it =
+            std::find_if(commits.begin(), commits.end(),
+                         [&](const auto& stored_commit) { return stored_commit == commit; });
+        if (it == commits.end()) {
+            LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
+            UNREACHABLE();
+            return;
+        }
+        commits.erase(it);
+    }
+
+    /// Returns whether this allocation is compatible with the arguments.
+    bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const {
+        return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) &&
+               (type_mask & shifted_type) != 0;
+    }
+
+private:
+    static constexpr u32 ShiftType(u32 type) {
+        return 1U << type;
+    }
+
+    /// A memory allocator, it may return a free region between "start" and "end" with the solicited
+    /// requeriments.
+    std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
+        u64 iterator = start;
+        while (iterator + size < end) {
+            const u64 try_left = Common::AlignUp(iterator, alignment);
+            const u64 try_right = try_left + size;
+
+            bool overlap = false;
+            for (const auto& commit : commits) {
+                const auto [commit_left, commit_right] = commit->interval;
+                if (try_left < commit_right && commit_left < try_right) {
+                    // There's an overlap, continue the search where the overlapping commit ends.
+                    iterator = commit_right;
+                    overlap = true;
+                    break;
+                }
+            }
+            if (!overlap) {
+                // A free address has been found.
+                return try_left;
+            }
+        }
+        // No free regions where found, return an empty optional.
+        return std::nullopt;
+    }
+
+    const VKDevice& device;                   ///< Vulkan device.
+    const vk::DeviceMemory memory;            ///< Vulkan memory allocation handler.
+    const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
+    const u64 alloc_size;                     ///< Size of this allocation.
+    const u32 shifted_type;                   ///< Stored Vulkan type of this allocation, shifted.
+    const bool is_mappable;                   ///< Whether the allocation is mappable.
+
+    /// Base address of the mapped pointer.
+    u8* base_address{};
+
+    /// Hints where the next free region is likely going to be.
+    u64 free_iterator{};
+
+    /// Stores all commits done from this allocation.
+    std::vector<const VKMemoryCommitImpl*> commits;
+};
+
+VKMemoryManager::VKMemoryManager(const VKDevice& device)
+    : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
+      is_memory_unified{GetMemoryUnified(props)} {}
+
+VKMemoryManager::~VKMemoryManager() = default;
+
+VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
+    ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
+
+    // When a host visible commit is asked, search for host visible and coherent, otherwise search
+    // for a fast device local type.
+    const vk::MemoryPropertyFlags wanted_properties =
+        host_visible
+            ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
+            : vk::MemoryPropertyFlagBits::eDeviceLocal;
+
+    const auto TryCommit = [&]() -> VKMemoryCommit {
+        for (auto& alloc : allocs) {
+            if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
+                continue;
+
+            if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
+                return commit;
+            }
+        }
+        return {};
+    };
+
+    if (auto commit = TryCommit(); commit) {
+        return commit;
+    }
+
+    // Commit has failed, allocate more memory.
+    if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
+        // TODO(Rodrigo): Try to use host memory.
+        LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
+        UNREACHABLE();
+    }
+
+    // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
+    // there's a bug.
+    auto commit = TryCommit();
+    ASSERT(commit);
+    return commit;
+}
+
+VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
+    auto commit = Commit(requeriments, host_visible);
+    dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
+    return commit;
+}
+
+VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto requeriments = dev.getImageMemoryRequirements(image, dld);
+    auto commit = Commit(requeriments, host_visible);
+    dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
+    return commit;
+}
+
+bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
+                                  u64 size) {
+    const u32 type = [&]() {
+        for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
+            const auto flags = props.memoryTypes[type_index].propertyFlags;
+            if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
+                // The type matches in type and in the wanted properties.
+                return type_index;
+            }
+        }
+        LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
+        UNREACHABLE();
+        return 0u;
+    }();
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    // Try to allocate found type.
+    const vk::MemoryAllocateInfo memory_ai(size, type);
+    vk::DeviceMemory memory;
+    if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
+        res != vk::Result::eSuccess) {
+        LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
+        return false;
+    }
+    allocs.push_back(
+        std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
+    return true;
+}
+
+/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
+    for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
+        if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
+            // Memory is considered unified when heaps are device local only.
+            return false;
+        }
+    }
+    return true;
+}
+
+VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
+                                       u8* data, u64 begin, u64 end)
+    : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {}
+
+VKMemoryCommitImpl::~VKMemoryCommitImpl() {
+    allocation->Free(this);
+}
+
+u8* VKMemoryCommitImpl::GetData() const {
+    ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
+    return data;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -0,0 +1,87 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKMemoryAllocation;
+class VKMemoryCommitImpl;
+
+using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
+
+class VKMemoryManager final {
+public:
+    explicit VKMemoryManager(const VKDevice& device);
+    ~VKMemoryManager();
+
+    /**
+     * Commits a memory with the specified requeriments.
+     * @param reqs Requeriments returned from a Vulkan call.
+     * @param host_visible Signals the allocator that it *must* use host visible and coherent
+     * memory. When passing false, it will try to allocate device local memory.
+     * @returns A memory commit.
+     */
+    VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
+
+    /// Commits memory required by the buffer and binds it.
+    VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible);
+
+    /// Commits memory required by the image and binds it.
+    VKMemoryCommit Commit(vk::Image image, bool host_visible);
+
+    /// Returns true if the memory allocations are done always in host visible and coherent memory.
+    bool IsMemoryUnified() const {
+        return is_memory_unified;
+    }
+
+private:
+    /// Allocates a chunk of memory.
+    bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
+
+    /// Returns true if the device uses an unified memory model.
+    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
+
+    const VKDevice& device;                                  ///< Device handler.
+    const vk::PhysicalDeviceMemoryProperties props;          ///< Physical device properties.
+    const bool is_memory_unified;                            ///< True if memory model is unified.
+    std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
+};
+
+class VKMemoryCommitImpl final {
+    friend VKMemoryAllocation;
+
+public:
+    explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
+                                u64 begin, u64 end);
+    ~VKMemoryCommitImpl();
+
+    /// Returns the writeable memory map. The commit has to be mappable.
+    u8* GetData() const;
+
+    /// Returns the Vulkan memory handler.
+    vk::DeviceMemory GetMemory() const {
+        return memory;
+    }
+
+    /// Returns the start position of the commit relative to the allocation.
+    vk::DeviceSize GetOffset() const {
+        return static_cast<vk::DeviceSize>(interval.first);
+    }
+
+private:
+    std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
+    vk::DeviceMemory memory;          ///< Vulkan device memory handler.
+    VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
+    u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -0,0 +1,60 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Vulkan {
+
+VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
+    : device{device}, resource_manager{resource_manager} {
+    next_fence = &resource_manager.CommitFence();
+    AllocateNewContext();
+}
+
+VKScheduler::~VKScheduler() = default;
+
+VKExecutionContext VKScheduler::GetExecutionContext() const {
+    return VKExecutionContext(current_fence, current_cmdbuf);
+}
+
+VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
+    SubmitExecution(semaphore);
+    current_fence->Release();
+    AllocateNewContext();
+    return GetExecutionContext();
+}
+
+VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) {
+    SubmitExecution(semaphore);
+    current_fence->Wait();
+    current_fence->Release();
+    AllocateNewContext();
+    return GetExecutionContext();
+}
+
+void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
+    const auto& dld = device.GetDispatchLoader();
+    current_cmdbuf.end(dld);
+
+    const auto queue = device.GetGraphicsQueue();
+    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
+                                     &semaphore);
+    queue.submit({submit_info}, *current_fence, dld);
+}
+
+void VKScheduler::AllocateNewContext() {
+    current_fence = next_fence;
+    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
+    next_fence = &resource_manager.CommitFence();
+
+    const auto& dld = device.GetDispatchLoader();
+    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -0,0 +1,69 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKExecutionContext;
+class VKFence;
+class VKResourceManager;
+
+/// The scheduler abstracts command buffer and fence management with an interface that's able to do
+/// OpenGL-like operations on Vulkan command buffers.
+class VKScheduler {
+public:
+    explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
+    ~VKScheduler();
+
+    /// Gets the current execution context.
+    [[nodiscard]] VKExecutionContext GetExecutionContext() const;
+
+    /// Sends the current execution context to the GPU. It invalidates the current execution context
+    /// and returns a new one.
+    VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
+
+    /// Sends the current execution context to the GPU and waits for it to complete. It invalidates
+    /// the current execution context and returns a new one.
+    VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
+
+private:
+    void SubmitExecution(vk::Semaphore semaphore);
+
+    void AllocateNewContext();
+
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    vk::CommandBuffer current_cmdbuf;
+    VKFence* current_fence = nullptr;
+    VKFence* next_fence = nullptr;
+};
+
+class VKExecutionContext {
+    friend class VKScheduler;
+
+public:
+    VKExecutionContext() = default;
+
+    VKFence& GetFence() const {
+        return *fence;
+    }
+
+    vk::CommandBuffer GetCommandBuffer() const {
+        return cmdbuf;
+    }
+
+private:
+    explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
+        : fence{fence}, cmdbuf{cmdbuf} {}
+
+    VKFence* fence{};
+    vk::CommandBuffer cmdbuf;
+};
+
+} // namespace Vulkan
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                             "Unaligned attribute loads are not supported");

-        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
+        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
                                          Tegra::Shader::IpaSampleMode::Default};

        u64 next_element = instr.attribute.fmt20.element;
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
                                                instr.ipa.sample_mode.Value()};

        const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
-        const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
+        Node value = attr;
+        const Tegra::Shader::Attribute::Index index = attribute.index.Value();
+        if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
+            index <= Tegra::Shader::Attribute::Index::Attribute_31) {
+            // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
+            // In theory by setting them as perspective, OpenGL does the perspective correction.
+            // A way must figured to reverse the last step of it.
+            if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
+                value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
+            }
+        }
+        value = GetSaturatedFloat(value, instr.ipa.saturate);

        SetRegister(bb, instr.gpr0, value);
        break;
@@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
Author	SHA1	Message	Date
Lioncash	be7dad5e7e	service/vi: Update IManagerDisplayService's function table Amends it to add the 7.0.0+ CreateStrayLayer function.	2019-02-25 08:09:00 -05:00
bunnei	c07987dfab	Merge pull request #2118 from FernandoS27/ipa-improve shader_decompiler: Improve Accuracy of Attribute Interpolation.	2019-02-24 23:04:22 -05:00
bunnei	c4243c07cc	Merge pull request #2119 from FernandoS27/fix-copy rasterizer_cache_gl: Only do fast layered copy on the same format.	2019-02-24 23:03:52 -05:00
bunnei	c6170565b5	Merge pull request #2155 from FearlessTobi/port-4655 Port citra-emu/citra#4655: "Remove GCC version checks"	2019-02-24 23:03:13 -05:00
bunnei	57985fb16a	Merge pull request #2144 from lioncash/factor service/vi: Convert Display and Layer structs into classes	2019-02-24 23:02:50 -05:00
tgsm	030814b1cb	Remove GCC version checks Citra can't be compiled using GCC <7 because of required C++17 support, so these version checks don't need to exist anymore.	2019-02-24 15:24:06 +01:00
bunnei	90c780e6f3	Merge pull request #2139 from degasus/dma_pusher video_core/dma_pusher: The full list of headers at once.	2019-02-24 04:15:49 -05:00
bunnei	f7090bacc5	Merge pull request #2146 from ReinUsesLisp/vulkan-scheduler vk_scheduler: Implement a scheduler	2019-02-23 23:32:43 -05:00
bunnei	d062991643	Merge pull request #2150 from ReinUsesLisp/fixup-layer-swizzle gl_rasterizer_cache: Fixup parameter order in layered swizzle	2019-02-23 23:31:34 -05:00
bunnei	4ab978d670	Merge pull request #2151 from ReinUsesLisp/fixup-vk-memory-manager vk_memory_manager: Fixup commit interval allocation	2019-02-23 23:29:53 -05:00
ReinUsesLisp	92050c4d86	vk_memory_manager: Fixup commit interval allocation VKMemoryCommitImpl was using as the end of its interval "begin + end". That ended up wasting memory.	2019-02-24 01:04:41 -03:00
ReinUsesLisp	abef11a540	gl_rasterizer_cache: Fixup parameter order in layered swizzle	2019-02-23 23:27:30 -03:00
ReinUsesLisp	f546fb35ed	vk_scheduler: Implement a scheduler The scheduler abstracts command buffer and fence management with an interface that's able to do OpenGL-like operations on Vulkan command buffers. It returns by value a command buffer and fence that have to be used for subsequent operations until Flush or Finish is executed, after that the current execution context (the pair of command buffers and fences) gets invalidated a new one must be fetched. Thankfully validation layers will quickly detect if this is skipped throwing an error due to modifications to a sent command buffer.	2019-02-22 01:33:32 -03:00
bunnei	94b27bb8a5	Merge pull request #2138 from ReinUsesLisp/vulkan-memory-manager vk_memory_manager: Implement memory manager	2019-02-21 22:26:54 -05:00
Markus Wick	6dd40976d0	video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit.	2019-02-19 10:28:42 +01:00
Markus Wick	717394c980	video_core/dma_pusher: The full list of headers at once. Fetching every u32 from memory leads to a big overhead. So let's fetch all of them as a block if possible. This reduces the Memory::* calls by the dma_pusher by a factor of 10.	2019-02-19 09:58:38 +01:00
ReinUsesLisp	b675c97cdd	vk_memory_manager: Implement memory manager A memory manager object handles the memory allocations for a device. It allocates chunks of Vulkan memory objects and then suballocates.	2019-02-19 03:42:28 -03:00
Fernando Sahmkow	10682ad7e0	shader_decompiler: Improve Accuracy of Attribute Interpolation.	2019-02-14 03:25:07 -04:00
Fernando Sahmkow	bb41683394	rasterizer_cache_gl: Only do fast layered copy on the same format. As glCopyImageSubData does not support different formats.	2019-02-13 16:55:00 -04:00