NvDrv/nvhost-as-gpu: Ensure that the object passed to MapBufferEx has already been allocated.

Also added a consistency check and a comment for the case when the object id is different than its handle. The real nvservices doesn't make a distinction between ids and handles, each object gets an unique handle which doubles as its id.
Nvdrv/nvhost-as-gpu: Implemented the ioctl REMAP command.
2018-04-23 11:21:46 -05:00 · 2018-04-23 11:21:46 -05:00
5 changed files with 100 additions and 89 deletions
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -27,6 +27,11 @@ u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vecto
    case IoctlCommand::IocGetVaRegionsCommand:
        return GetVARegions(input, output);
    }
+
+    if (static_cast<IoctlCommand>(command.cmd.Value()) == IoctlCommand::IocRemapCommand)
+        return Remap(input, output);
+
+    UNIMPLEMENTED_MSG("Unimplemented ioctl command");
    return 0;
 }

@@ -56,6 +61,36 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>&
    return 0;
 }

+u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output) {
+    size_t num_entries = input.size() / sizeof(IoctlRemapEntry);
+
+    NGLOG_WARNING(Service_NVDRV, "(STUBBED) called, num_entries=0x{:X}", num_entries);
+
+    std::vector<IoctlRemapEntry> entries(num_entries);
+    std::memcpy(entries.data(), input.data(), input.size());
+
+    auto& gpu = Core::System::GetInstance().GPU();
+
+    for (const auto& entry : entries) {
+        NGLOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
+                      entry.offset, entry.nvmap_handle, entry.pages);
+        Tegra::GPUVAddr offset = static_cast<Tegra::GPUVAddr>(entry.offset) << 0x10;
+
+        auto object = nvmap_dev->GetObject(entry.nvmap_handle);
+        ASSERT(object);
+
+        ASSERT(object->status == nvmap::Object::Status::Allocated);
+
+        u64 size = static_cast<u64>(entry.pages) << 0x10;
+        ASSERT(size <= object->size);
+
+        Tegra::GPUVAddr returned = gpu.memory_manager->MapBufferEx(object->addr, offset, size);
+        ASSERT(returned == offset);
+    }
+    std::memcpy(output.data(), entries.data(), output.size());
+    return 0;
+}
+
 u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlMapBufferEx params{};
    std::memcpy(&params, input.data(), input.size());
@@ -73,6 +108,16 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
    auto object = nvmap_dev->GetObject(params.nvmap_handle);
    ASSERT(object);

+    // We can only map objects that have already been assigned a CPU address.
+    ASSERT(object->status == nvmap::Object::Status::Allocated);
+
+    ASSERT(params.buffer_offset == 0);
+
+    // The real nvservices doesn't make a distinction between handles and ids, and
+    // object can only have one handle and it will be the same as its id. Assert that this is the
+    // case to prevent unexpected behavior.
+    ASSERT(object->id == params.nvmap_handle);
+
    auto& gpu = Core::System::GetInstance().GPU();

    if (params.flags & 1) {
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -26,6 +26,7 @@ private:
    enum class IoctlCommand : u32_le {
        IocInitalizeExCommand = 0x40284109,
        IocAllocateSpaceCommand = 0xC0184102,
+        IocRemapCommand = 0x00000014,
        IocMapBufferExCommand = 0xC0284106,
        IocBindChannelCommand = 0x40044101,
        IocGetVaRegionsCommand = 0xC0404108,
@@ -54,6 +55,16 @@ private:
    };
    static_assert(sizeof(IoctlAllocSpace) == 24, "IoctlInitalizeEx is incorrect size");

+    struct IoctlRemapEntry {
+        u16_le flags;
+        u16_le kind;
+        u32_le nvmap_handle;
+        INSERT_PADDING_WORDS(1);
+        u32_le offset;
+        u32_le pages;
+    };
+    static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size");
+
    struct IoctlMapBufferEx {
        u32_le flags; // bit0: fixed_offset, bit2: cacheable
        u32_le kind;  // -1 is default
@@ -91,6 +102,7 @@ private:

    u32 InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output);
    u32 AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 Remap(const std::vector<u8>& input, std::vector<u8>& output);
    u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
    u32 BindChannel(const std::vector<u8>& input, std::vector<u8>& output);
    u32 GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -500,11 +500,6 @@ public:
                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) |
                                                     start_low);
                    }
-
-                    bool IsEnabled() const {
-                        return enable != 0 && StartAddress() != 0;
-                    }
-
                } vertex_array[NumVertexArrays];

                Blend blend;
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -127,8 +127,7 @@ RasterizerOpenGL::~RasterizerOpenGL() {
    }
 }

-std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
-                                                             GLintptr buffer_offset) {
+void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
    MICROPROFILE_SCOPE(OpenGL_VAO);
    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
    const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
@@ -137,59 +136,43 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
    state.draw.vertex_buffer = stream_buffer->GetHandle();
    state.Apply();

-    // Upload all guest vertex arrays sequentially to our buffer
-    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        const auto& vertex_array = regs.vertex_array[index];
-        if (!vertex_array.IsEnabled())
-            continue;
-
-        const Tegra::GPUVAddr start = vertex_array.StartAddress();
-        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
-
-        ASSERT(end > start);
-        u64 size = end - start + 1;
-
-        // Copy vertex array data
-        const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(start)};
-        res_cache.FlushRegion(data_addr, size, nullptr);
-        Memory::ReadBlock(data_addr, array_ptr, size);
-
-        // Bind the vertex array to the buffer at the current offset.
-        glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride);
-
-        ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented");
-
-        array_ptr += size;
-        buffer_offset += size;
+    // TODO(bunnei): Add support for 1+ vertex arrays
+    const auto& vertex_array{regs.vertex_array[0]};
+    const auto& vertex_array_limit{regs.vertex_array_limit[0]};
+    ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?");
+    ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!");
+    for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) {
+        ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index);
    }

    // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
    // Enables the first 16 vertex attributes always, as we don't know which ones are actually used
-    // until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now
+    // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now
    // to avoid OpenGL errors.
-    // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
-    // assume every shader uses them all.
    for (unsigned index = 0; index < 16; ++index) {
        auto& attrib = regs.vertex_attrib_format[index];
        NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
                    index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
                    attrib.offset.Value(), attrib.IsNormalized());

-        auto& buffer = regs.vertex_array[attrib.buffer];
-        ASSERT(buffer.IsEnabled());
-
+        glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
+                              attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride,
+                              reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset));
        glEnableVertexAttribArray(index);
-        glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
-                             attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
-        glVertexAttribBinding(index, attrib.buffer);
-
        hw_vao_enabled_attributes[index] = true;
    }

-    return {array_ptr, buffer_offset};
+    // Copy vertex array data
+    const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1};
+    const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())};
+    res_cache.FlushRegion(data_addr, data_size, nullptr);
+    Memory::ReadBlock(data_addr, array_ptr, data_size);
+
+    array_ptr += data_size;
+    buffer_offset += data_size;
 }

-void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
+void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) {
    // Helper function for uploading uniform data
    const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
        if (has_ARB_direct_state_access) {
@@ -207,6 +190,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
    u32 current_constbuffer_bindpoint = 0;

    for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
+        ptr_pos += sizeof(GLShader::MaxwellUniformData);
+
        auto& shader_config = gpu.regs.shader_config[index];
        const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};

@@ -220,16 +205,13 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
        }

        // Upload uniform data as one UBO per stage
-        const GLintptr ubo_offset = buffer_offset;
+        const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
        copy_buffer(uniform_buffers[stage].handle, ubo_offset,
                    sizeof(GLShader::MaxwellUniformData));
        GLShader::MaxwellUniformData* ub_ptr =
-            reinterpret_cast<GLShader::MaxwellUniformData*>(buffer_ptr);
+            reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]);
        ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]);

-        buffer_ptr += sizeof(GLShader::MaxwellUniformData);
-        buffer_offset += sizeof(GLShader::MaxwellUniformData);
-
        // Fetch program code from memory
        GLShader::ProgramCode program_code;
        const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
@@ -270,24 +252,6 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
    shader_program_manager->UseTrivialGeometryShader();
 }

-size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
-    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
-
-    size_t size = 0;
-    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        if (!regs.vertex_array[index].IsEnabled())
-            continue;
-
-        const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress();
-        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
-
-        ASSERT(end > start);
-        size += end - start + 1;
-    }
-
-    return size;
-}
-
 bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
    accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
    DrawArrays();
@@ -365,49 +329,44 @@ void RasterizerOpenGL::DrawArrays() {
    const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
    const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};

+    // TODO(bunnei): Add support for 1+ vertex arrays
+    vs_input_size = vertex_num * regs.vertex_array[0].stride;
+
    state.draw.vertex_buffer = stream_buffer->GetHandle();
    state.Apply();

-    size_t buffer_size = CalculateVertexArraysSize();
-
+    size_t buffer_size = static_cast<size_t>(vs_input_size);
    if (is_indexed) {
-        buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size;
+        buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size;
    }

    // Uniform space for the 5 shader stages
-    buffer_size = Common::AlignUp<size_t>(buffer_size, 4) +
-                  sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage;
+    buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage;

+    size_t ptr_pos = 0;
    u8* buffer_ptr;
    GLintptr buffer_offset;
    std::tie(buffer_ptr, buffer_offset) =
        stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4);

-    u8* offseted_buffer;
-    std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
-
-    offseted_buffer =
-        reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
-    buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
+    SetupVertexArray(buffer_ptr, buffer_offset);
+    ptr_pos += vs_input_size;

    // If indexed mode, copy the index buffer
    GLintptr index_buffer_offset = 0;
    if (is_indexed) {
+        ptr_pos = Common::AlignUp(ptr_pos, 4);
+
        const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
        const VAddr index_data_addr{
            memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())};
-        Memory::ReadBlock(index_data_addr, offseted_buffer, index_buffer_size);
+        Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size);

-        index_buffer_offset = buffer_offset;
-        offseted_buffer += index_buffer_size;
-        buffer_offset += index_buffer_size;
+        index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
+        ptr_pos += index_buffer_size;
    }

-    offseted_buffer =
-        reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
-    buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
-
-    SetupShaders(offseted_buffer, buffer_offset);
+    SetupShaders(buffer_ptr, buffer_offset, ptr_pos);

    stream_buffer->Unmap();

--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -148,13 +148,13 @@ private:
    static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024;
    std::unique_ptr<OGLStreamBuffer> stream_buffer;

-    size_t CalculateVertexArraysSize() const;
+    GLsizeiptr vs_input_size;

-    std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset);
+    void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset);

    std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;

-    void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset);
+    void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos);

    enum class AccelDraw { Disabled, Arrays, Indexed };
    AccelDraw accelerate_draw;