Merge 3c7619ce84 into 066d6184d4

memory_manager: Allow GpuToCpuAddress to return a zero address.
- Rationale: It's up to the caller to handle this or assert.
2018-07-01 15:41:42 +00:00 · 2018-06-30 02:32:33 -04:00 · 2018-06-30 02:09:34 -04:00 · 2018-06-30 01:48:22 -04:00
14 changed files with 70 additions and 86 deletions
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -19,14 +19,20 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector

 void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
                        u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform) {
-    VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
-    NGLOG_WARNING(Service,
-                  "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
-                  addr, offset, width, height, stride, format);
+
+    const auto& object{nvmap_dev->GetObject(buffer_handle)};
+    ASSERT(object);
+    ASSERT(object->status == nvmap::Object::Status::Allocated);

    using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
-    const Tegra::FramebufferConfig framebuffer{
-        addr, offset, width, height, stride, static_cast<PixelFormat>(format), transform};
+    const Tegra::FramebufferConfig framebuffer{object->cpu_addr,
+                                               object->gpu_addr,
+                                               offset,
+                                               width,
+                                               height,
+                                               stride,
+                                               static_cast<PixelFormat>(format),
+                                               transform};

    Core::System::GetInstance().perf_stats.EndGameFrame();

--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -85,8 +85,8 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
        u64 size = static_cast<u64>(entry.pages) << 0x10;
        ASSERT(size <= object->size);

-        Tegra::GPUVAddr returned = gpu.memory_manager->MapBufferEx(object->addr, offset, size);
-        ASSERT(returned == offset);
+        object->gpu_addr = gpu.memory_manager->MapBufferEx(object->cpu_addr, offset, size);
+        ASSERT(object->gpu_addr == offset);
    }
    std::memcpy(output.data(), entries.data(), output.size());
    return 0;
@@ -122,10 +122,12 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
    auto& gpu = Core::System::GetInstance().GPU();

    if (params.flags & 1) {
-        params.offset = gpu.memory_manager->MapBufferEx(object->addr, params.offset, object->size);
+        object->gpu_addr =
+            gpu.memory_manager->MapBufferEx(object->cpu_addr, params.offset, object->size);
    } else {
-        params.offset = gpu.memory_manager->MapBufferEx(object->addr, object->size);
+        object->gpu_addr = gpu.memory_manager->MapBufferEx(object->cpu_addr, object->size);
    }
+    params.offset = object->gpu_addr;

    // Create a new mapping entry for this operation.
    ASSERT_MSG(buffer_mappings.find(params.offset) == buffer_mappings.end(),
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -11,13 +11,6 @@

 namespace Service::Nvidia::Devices {

-VAddr nvmap::GetObjectAddress(u32 handle) const {
-    auto object = GetObject(handle);
-    ASSERT(object);
-    ASSERT(object->status == Object::Status::Allocated);
-    return object->addr;
-}
-
 u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
    switch (static_cast<IoctlCommand>(command.raw)) {
    case IoctlCommand::Create:
@@ -70,7 +63,7 @@ u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
    object->flags = params.flags;
    object->align = params.align;
    object->kind = params.kind;
-    object->addr = params.addr;
+    object->cpu_addr = params.addr;
    object->status = Object::Status::Allocated;

    NGLOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr);
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -11,6 +11,7 @@
 #include "common/common_types.h"
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
+#include "video_core/memory_manager.h"

 namespace Service::Nvidia::Devices {

@@ -19,9 +20,6 @@ public:
    nvmap() = default;
    ~nvmap() override = default;

-    /// Returns the allocated address of an nvmap object given its handle.
-    VAddr GetObjectAddress(u32 handle) const;
-
    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;

    /// Represents an nvmap object.
@@ -32,7 +30,8 @@ public:
        u32 flags;
        u32 align;
        u8 kind;
-        VAddr addr;
+        VAddr cpu_addr;
+        Tegra::GPUVAddr gpu_addr;
        Status status;
        u32 refcount;
    };
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -332,8 +332,8 @@ u8* GetPhysicalPointer(PAddr address) {
    return target_pointer;
 }

-void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached) {
-    if (gpu_addr == 0) {
+void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
+    if (vaddr == 0) {
        return;
    }

@@ -342,19 +342,8 @@ void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached)
    // CPU pages, hence why we iterate on a CPU page basis (note: GPU page size is different). This
    // assumes the specified GPU address region is contiguous as well.

-    u64 num_pages = ((gpu_addr + size - 1) >> PAGE_BITS) - (gpu_addr >> PAGE_BITS) + 1;
-    for (unsigned i = 0; i < num_pages; ++i, gpu_addr += PAGE_SIZE) {
-        boost::optional<VAddr> maybe_vaddr =
-            Core::System::GetInstance().GPU().memory_manager->GpuToCpuAddress(gpu_addr);
-        // The GPU <-> CPU virtual memory mapping is not 1:1
-        if (!maybe_vaddr) {
-            NGLOG_ERROR(HW_Memory,
-                        "Trying to flush a cached region to an invalid physical address {:016X}",
-                        gpu_addr);
-            continue;
-        }
-        VAddr vaddr = *maybe_vaddr;
-
+    u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
+    for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
        PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];

        if (cached) {
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -266,7 +266,7 @@ enum class FlushMode {
 /**
 * Mark each page touching the region as cached.
 */
-void RasterizerMarkRegionCached(Tegra::GPUVAddr start, u64 size, bool cached);
+void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);

 /**
 * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -49,7 +49,8 @@ struct FramebufferConfig {
        UNREACHABLE();
    }

-    VAddr address;
+    VAddr cpu_addr;
+    Tegra::GPUVAddr gpu_addr;
    u32 offset;
    u32 width;
    u32 height;
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -100,9 +100,10 @@ boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {

 boost::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
    VAddr base_addr = PageSlot(gpu_addr);
-    ASSERT(base_addr != static_cast<u64>(PageStatus::Unmapped));

-    if (base_addr == static_cast<u64>(PageStatus::Allocated)) {
+    switch (static_cast<PageStatus>(base_addr)) {
+    case PageStatus::Unmapped:
+    case PageStatus::Allocated:
        return {};
    }

--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -51,8 +51,9 @@ public:
    }

    /// Attempt to use a faster method to display the framebuffer to screen
-    virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
-                                   u32 pixel_stride, ScreenInfo& screen_info) {
+    virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
+                                   Tegra::GPUVAddr framebuffer_addr, u32 pixel_stride,
+                                   ScreenInfo& screen_info) {
        return false;
    }

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -482,7 +482,7 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) {
 }

 bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
-                                         VAddr framebuffer_addr, u32 pixel_stride,
+                                         Tegra::GPUVAddr framebuffer_addr, u32 pixel_stride,
                                         ScreenInfo& screen_info) {
    if (!framebuffer_addr) {
        return {};
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -36,8 +36,9 @@ public:
    bool AccelerateDisplayTransfer(const void* config) override;
    bool AccelerateTextureCopy(const void* config) override;
    bool AccelerateFill(const void* config) override;
-    bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, VAddr framebuffer_addr,
-                           u32 pixel_stride, ScreenInfo& screen_info) override;
+    bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer,
+                           Tegra::GPUVAddr framebuffer_addr, u32 pixel_stride,
+                           ScreenInfo& screen_info) override;
    bool AccelerateDrawBatch(bool is_indexed) override;

    /// OpenGL shader generated for a given Maxwell register state
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -34,8 +34,10 @@ struct FormatTuple {
 /*static*/ SurfaceParams SurfaceParams::CreateForTexture(
    const Tegra::Texture::FullTextureInfo& config) {

+    const auto& gpu{Core::System::GetInstance().GPU()};
    SurfaceParams params{};
    params.addr = config.tic.Address();
+    params.cpu_addr = gpu.memory_manager->GpuToCpuAddress(params.addr).get_value_or(0);
    params.is_tiled = config.tic.IsTiled();
    params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
    params.pixel_format = PixelFormatFromTextureFormat(config.tic.format);
@@ -51,8 +53,10 @@ struct FormatTuple {
 /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(
    const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) {

+    const auto& gpu{Core::System::GetInstance().GPU()};
    SurfaceParams params{};
    params.addr = config.Address();
+    params.cpu_addr = *gpu.memory_manager->GpuToCpuAddress(params.addr);
    params.is_tiled = true;
    params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
@@ -102,11 +106,6 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
    return {};
 }

-VAddr SurfaceParams::GetCpuAddr() const {
-    const auto& gpu = Core::System::GetInstance().GPU();
-    return *gpu.memory_manager->GpuToCpuAddress(addr);
-}
-
 static bool IsPixelFormatASTC(PixelFormat format) {
    switch (format) {
    case PixelFormat::ASTC_2D_4X4:
@@ -235,7 +234,7 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64
 void CachedSurface::LoadGLBuffer() {
    ASSERT(params.type != SurfaceType::Fill);

-    u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr());
+    u8* const texture_src_data = Memory::GetPointer(params.cpu_addr);

    ASSERT(texture_src_data);

@@ -261,7 +260,7 @@ void CachedSurface::LoadGLBuffer() {

 MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
 void CachedSurface::FlushGLBuffer() {
-    u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr());
+    u8* const dst_buffer = Memory::GetPointer(params.cpu_addr);

    ASSERT(dst_buffer);
    ASSERT(gl_buffer.size() ==
@@ -457,7 +456,7 @@ void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
 }

 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
-    if (params.addr == 0 || params.height * params.width == 0) {
+    if (params.cpu_addr == 0 || params.height * params.width == 0) {
        return {};
    }

@@ -480,19 +479,16 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
    return surface;
 }

-Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
-    // Tries to find the GPU address of a framebuffer based on the CPU address. This is because
-    // final output framebuffers are specified by CPU address, but internally our GPU cache uses
-    // GPU addresses. We iterate through all cached framebuffers, and compare their starting CPU
-    // address to the one provided. This is obviously not great, and won't work if the
-    // framebuffer overlaps surfaces.
+Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(Tegra::GPUVAddr gpu_addr) const {
+    // Tries to find a framebuffer based on a GPU address. We iterate through all cached
+    // framebuffers, and compare their starting GPU address to the one provided. This is obviously
+    // not great, and won't work if the framebuffer overlaps surfaces.

    std::vector<Surface> surfaces;
    for (const auto& surface : surface_cache) {
        const auto& params = surface.second->GetSurfaceParams();
-        const VAddr surface_cpu_addr = params.GetCpuAddr();
-        if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) {
-            ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported");
+        if (gpu_addr >= params.addr && gpu_addr < (params.addr + params.size_in_bytes)) {
+            ASSERT_MSG(gpu_addr == params.addr, "overlapping surfaces are unsupported");
            surfaces.push_back(surface.second);
        }
    }
@@ -534,7 +530,7 @@ void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
    }

    surface_cache[surface_key] = surface;
-    UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
+    UpdatePagesCachedCount(params.cpu_addr, params.size_in_bytes, 1);
 }

 void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
@@ -547,7 +543,7 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
        return;
    }

-    UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1);
+    UpdatePagesCachedCount(params.cpu_addr, params.size_in_bytes, -1);
    surface_cache.erase(search);
 }

@@ -556,10 +552,10 @@ constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
    return boost::make_iterator_range(map.equal_range(interval));
 }

-void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
-    const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) -
-                          (addr >> Tegra::MemoryManager::PAGE_BITS) + 1;
-    const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS;
+void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+    const u64 num_pages =
+        ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1;
+    const u64 page_start = addr >> Memory::PAGE_BITS;
    const u64 page_end = page_start + num_pages;

    // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
@@ -572,10 +568,8 @@ void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 siz
        const auto interval = pair.first & pages_interval;
        const int count = pair.second;

-        const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval)
-                                                    << Tegra::MemoryManager::PAGE_BITS;
-        const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval)
-                                                  << Tegra::MemoryManager::PAGE_BITS;
+        const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
+        const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
        const u64 interval_size = interval_end_addr - interval_start_addr;

        if (delta > 0 && count == delta)
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -267,9 +267,6 @@ struct SurfaceParams {
               GetFormatBpp(pixel_format) / CHAR_BIT;
    }

-    /// Returns the CPU virtual address for this surface
-    VAddr GetCpuAddr() const;
-
    /// Returns true if the specified region overlaps with this surface's region in Switch memory
    bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const {
        return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
@@ -283,6 +280,7 @@ struct SurfaceParams {
        const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);

    Tegra::GPUVAddr addr;
+    VAddr cpu_addr;
    bool is_tiled;
    u32 block_height;
    PixelFormat pixel_format;
@@ -361,7 +359,7 @@ public:
    void MarkSurfaceAsDirty(const Surface& surface);

    /// Tries to find a framebuffer GPU address based on the provided CPU address
-    Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
+    Surface TryFindFramebufferSurface(Tegra::GPUVAddr gpu_addr) const;

    /// Write any cached resources overlapping the region back to memory (if dirty)
    void FlushRegion(Tegra::GPUVAddr addr, size_t size);
@@ -380,7 +378,7 @@ private:
    void UnregisterSurface(const Surface& surface);

    /// Increase/decrease the number of surface in pages touching the specified region
-    void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
+    void UpdatePagesCachedCount(VAddr addr, u64 size, int delta);

    std::unordered_map<SurfaceKey, Surface> surface_cache;
    PageMap cached_pages;
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -137,8 +137,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
                                        ScreenInfo& screen_info) {
    const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)};
    const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
-    const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
-
    // Framebuffer orientation handling
    framebuffer_transform_flags = framebuffer.transform_flags;

@@ -146,17 +144,18 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
    // only allows rows to have a memory alignement of 4.
    ASSERT(framebuffer.stride % 4 == 0);

-    if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride,
-                                         screen_info)) {
+    if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer.gpu_addr + framebuffer.offset,
+                                         framebuffer.stride, screen_info)) {
+
        // Reset the screen info's display texture to its own permanent texture
        screen_info.display_texture = screen_info.texture.resource.handle;

-        Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
-                                             Memory::FlushMode::Flush);
+        const VAddr cpu_addr{framebuffer.cpu_addr + framebuffer.offset};
+        Memory::RasterizerFlushVirtualRegion(cpu_addr, size_in_bytes, Memory::FlushMode::Flush);

        VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4,
-                                       Memory::GetPointer(framebuffer_addr),
-                                       gl_framebuffer_data.data(), true);
+                                       Memory::GetPointer(cpu_addr), gl_framebuffer_data.data(),
+                                       true);

        state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
        state.Apply();
Author	SHA1	Message	Date
bunnei	ca0557be52	Merge `3c7619ce84` into `066d6184d4`	2018-07-01 15:41:42 +00:00
bunnei	3c7619ce84	memory_manager: Allow GpuToCpuAddress to return a zero address. - Rationale: It's up to the caller to handle this or assert.	2018-06-30 02:32:33 -04:00
bunnei	ee67baba48	gl_rasterizer_cache: Track page counts in CPU virtual addresses.	2018-06-30 02:09:34 -04:00
bunnei	b614f9d96f	gl_rasterizer: Use GPU virtual addr when possible for framebuffers.	2018-06-30 01:48:22 -04:00