Compare commits

...

4 Commits

Author SHA1 Message Date
bunnei
ca0557be52 Merge 3c7619ce84 into 066d6184d4 2018-07-01 15:41:42 +00:00
bunnei
3c7619ce84 memory_manager: Allow GpuToCpuAddress to return a zero address.
- Rationale: It's up to the caller to handle this or assert.
2018-06-30 02:32:33 -04:00
bunnei
ee67baba48 gl_rasterizer_cache: Track page counts in CPU virtual addresses. 2018-06-30 02:09:34 -04:00
bunnei
b614f9d96f gl_rasterizer: Use GPU virtual addr when possible for framebuffers. 2018-06-30 01:48:22 -04:00
14 changed files with 70 additions and 86 deletions

View File

@@ -19,14 +19,20 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform) {
VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
NGLOG_WARNING(Service,
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
addr, offset, width, height, stride, format);
const auto& object{nvmap_dev->GetObject(buffer_handle)};
ASSERT(object);
ASSERT(object->status == nvmap::Object::Status::Allocated);
using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
const Tegra::FramebufferConfig framebuffer{
addr, offset, width, height, stride, static_cast<PixelFormat>(format), transform};
const Tegra::FramebufferConfig framebuffer{object->cpu_addr,
object->gpu_addr,
offset,
width,
height,
stride,
static_cast<PixelFormat>(format),
transform};
Core::System::GetInstance().perf_stats.EndGameFrame();

View File

@@ -85,8 +85,8 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
u64 size = static_cast<u64>(entry.pages) << 0x10;
ASSERT(size <= object->size);
Tegra::GPUVAddr returned = gpu.memory_manager->MapBufferEx(object->addr, offset, size);
ASSERT(returned == offset);
object->gpu_addr = gpu.memory_manager->MapBufferEx(object->cpu_addr, offset, size);
ASSERT(object->gpu_addr == offset);
}
std::memcpy(output.data(), entries.data(), output.size());
return 0;
@@ -122,10 +122,12 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
auto& gpu = Core::System::GetInstance().GPU();
if (params.flags & 1) {
params.offset = gpu.memory_manager->MapBufferEx(object->addr, params.offset, object->size);
object->gpu_addr =
gpu.memory_manager->MapBufferEx(object->cpu_addr, params.offset, object->size);
} else {
params.offset = gpu.memory_manager->MapBufferEx(object->addr, object->size);
object->gpu_addr = gpu.memory_manager->MapBufferEx(object->cpu_addr, object->size);
}
params.offset = object->gpu_addr;
// Create a new mapping entry for this operation.
ASSERT_MSG(buffer_mappings.find(params.offset) == buffer_mappings.end(),

View File

@@ -11,13 +11,6 @@
namespace Service::Nvidia::Devices {
VAddr nvmap::GetObjectAddress(u32 handle) const {
auto object = GetObject(handle);
ASSERT(object);
ASSERT(object->status == Object::Status::Allocated);
return object->addr;
}
u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
switch (static_cast<IoctlCommand>(command.raw)) {
case IoctlCommand::Create:
@@ -70,7 +63,7 @@ u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
object->flags = params.flags;
object->align = params.align;
object->kind = params.kind;
object->addr = params.addr;
object->cpu_addr = params.addr;
object->status = Object::Status::Allocated;
NGLOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr);

View File

@@ -11,6 +11,7 @@
#include "common/common_types.h"
#include "common/swap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
#include "video_core/memory_manager.h"
namespace Service::Nvidia::Devices {
@@ -19,9 +20,6 @@ public:
nvmap() = default;
~nvmap() override = default;
/// Returns the allocated address of an nvmap object given its handle.
VAddr GetObjectAddress(u32 handle) const;
u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
/// Represents an nvmap object.
@@ -32,7 +30,8 @@ public:
u32 flags;
u32 align;
u8 kind;
VAddr addr;
VAddr cpu_addr;
Tegra::GPUVAddr gpu_addr;
Status status;
u32 refcount;
};

View File

@@ -332,8 +332,8 @@ u8* GetPhysicalPointer(PAddr address) {
return target_pointer;
}
void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached) {
if (gpu_addr == 0) {
void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
if (vaddr == 0) {
return;
}
@@ -342,19 +342,8 @@ void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached)
// CPU pages, hence why we iterate on a CPU page basis (note: GPU page size is different). This
// assumes the specified GPU address region is contiguous as well.
u64 num_pages = ((gpu_addr + size - 1) >> PAGE_BITS) - (gpu_addr >> PAGE_BITS) + 1;
for (unsigned i = 0; i < num_pages; ++i, gpu_addr += PAGE_SIZE) {
boost::optional<VAddr> maybe_vaddr =
Core::System::GetInstance().GPU().memory_manager->GpuToCpuAddress(gpu_addr);
// The GPU <-> CPU virtual memory mapping is not 1:1
if (!maybe_vaddr) {
NGLOG_ERROR(HW_Memory,
"Trying to flush a cached region to an invalid physical address {:016X}",
gpu_addr);
continue;
}
VAddr vaddr = *maybe_vaddr;
u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
if (cached) {

View File

@@ -266,7 +266,7 @@ enum class FlushMode {
/**
* Mark each page touching the region as cached.
*/
void RasterizerMarkRegionCached(Tegra::GPUVAddr start, u64 size, bool cached);
void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
/**
* Flushes and invalidates any externally cached rasterizer resources touching the given virtual

View File

@@ -49,7 +49,8 @@ struct FramebufferConfig {
UNREACHABLE();
}
VAddr address;
VAddr cpu_addr;
Tegra::GPUVAddr gpu_addr;
u32 offset;
u32 width;
u32 height;

View File

@@ -100,9 +100,10 @@ boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
boost::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
VAddr base_addr = PageSlot(gpu_addr);
ASSERT(base_addr != static_cast<u64>(PageStatus::Unmapped));
if (base_addr == static_cast<u64>(PageStatus::Allocated)) {
switch (static_cast<PageStatus>(base_addr)) {
case PageStatus::Unmapped:
case PageStatus::Allocated:
return {};
}

View File

@@ -51,8 +51,9 @@ public:
}
/// Attempt to use a faster method to display the framebuffer to screen
virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info) {
virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
Tegra::GPUVAddr framebuffer_addr, u32 pixel_stride,
ScreenInfo& screen_info) {
return false;
}

View File

@@ -482,7 +482,7 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) {
}
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride,
Tegra::GPUVAddr framebuffer_addr, u32 pixel_stride,
ScreenInfo& screen_info) {
if (!framebuffer_addr) {
return {};

View File

@@ -36,8 +36,9 @@ public:
bool AccelerateDisplayTransfer(const void* config) override;
bool AccelerateTextureCopy(const void* config) override;
bool AccelerateFill(const void* config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, VAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer,
Tegra::GPUVAddr framebuffer_addr, u32 pixel_stride,
ScreenInfo& screen_info) override;
bool AccelerateDrawBatch(bool is_indexed) override;
/// OpenGL shader generated for a given Maxwell register state

View File

@@ -34,8 +34,10 @@ struct FormatTuple {
/*static*/ SurfaceParams SurfaceParams::CreateForTexture(
const Tegra::Texture::FullTextureInfo& config) {
const auto& gpu{Core::System::GetInstance().GPU()};
SurfaceParams params{};
params.addr = config.tic.Address();
params.cpu_addr = gpu.memory_manager->GpuToCpuAddress(params.addr).get_value_or(0);
params.is_tiled = config.tic.IsTiled();
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
params.pixel_format = PixelFormatFromTextureFormat(config.tic.format);
@@ -51,8 +53,10 @@ struct FormatTuple {
/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) {
const auto& gpu{Core::System::GetInstance().GPU()};
SurfaceParams params{};
params.addr = config.Address();
params.cpu_addr = *gpu.memory_manager->GpuToCpuAddress(params.addr);
params.is_tiled = true;
params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
@@ -102,11 +106,6 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
return {};
}
VAddr SurfaceParams::GetCpuAddr() const {
const auto& gpu = Core::System::GetInstance().GPU();
return *gpu.memory_manager->GpuToCpuAddress(addr);
}
static bool IsPixelFormatASTC(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
@@ -235,7 +234,7 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64
void CachedSurface::LoadGLBuffer() {
ASSERT(params.type != SurfaceType::Fill);
u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr());
u8* const texture_src_data = Memory::GetPointer(params.cpu_addr);
ASSERT(texture_src_data);
@@ -261,7 +260,7 @@ void CachedSurface::LoadGLBuffer() {
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
void CachedSurface::FlushGLBuffer() {
u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr());
u8* const dst_buffer = Memory::GetPointer(params.cpu_addr);
ASSERT(dst_buffer);
ASSERT(gl_buffer.size() ==
@@ -457,7 +456,7 @@ void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
if (params.addr == 0 || params.height * params.width == 0) {
if (params.cpu_addr == 0 || params.height * params.width == 0) {
return {};
}
@@ -480,19 +479,16 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
return surface;
}
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
// Tries to find the GPU address of a framebuffer based on the CPU address. This is because
// final output framebuffers are specified by CPU address, but internally our GPU cache uses
// GPU addresses. We iterate through all cached framebuffers, and compare their starting CPU
// address to the one provided. This is obviously not great, and won't work if the
// framebuffer overlaps surfaces.
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(Tegra::GPUVAddr gpu_addr) const {
// Tries to find a framebuffer based on a GPU address. We iterate through all cached
// framebuffers, and compare their starting GPU address to the one provided. This is obviously
// not great, and won't work if the framebuffer overlaps surfaces.
std::vector<Surface> surfaces;
for (const auto& surface : surface_cache) {
const auto& params = surface.second->GetSurfaceParams();
const VAddr surface_cpu_addr = params.GetCpuAddr();
if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) {
ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported");
if (gpu_addr >= params.addr && gpu_addr < (params.addr + params.size_in_bytes)) {
ASSERT_MSG(gpu_addr == params.addr, "overlapping surfaces are unsupported");
surfaces.push_back(surface.second);
}
}
@@ -534,7 +530,7 @@ void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
}
surface_cache[surface_key] = surface;
UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
UpdatePagesCachedCount(params.cpu_addr, params.size_in_bytes, 1);
}
void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
@@ -547,7 +543,7 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
return;
}
UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1);
UpdatePagesCachedCount(params.cpu_addr, params.size_in_bytes, -1);
surface_cache.erase(search);
}
@@ -556,10 +552,10 @@ constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
return boost::make_iterator_range(map.equal_range(interval));
}
void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) -
(addr >> Tegra::MemoryManager::PAGE_BITS) + 1;
const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS;
void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
const u64 num_pages =
((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1;
const u64 page_start = addr >> Memory::PAGE_BITS;
const u64 page_end = page_start + num_pages;
// Interval maps will erase segments if count reaches 0, so if delta is negative we have to
@@ -572,10 +568,8 @@ void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 siz
const auto interval = pair.first & pages_interval;
const int count = pair.second;
const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval)
<< Tegra::MemoryManager::PAGE_BITS;
const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval)
<< Tegra::MemoryManager::PAGE_BITS;
const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
const u64 interval_size = interval_end_addr - interval_start_addr;
if (delta > 0 && count == delta)

View File

@@ -267,9 +267,6 @@ struct SurfaceParams {
GetFormatBpp(pixel_format) / CHAR_BIT;
}
/// Returns the CPU virtual address for this surface
VAddr GetCpuAddr() const;
/// Returns true if the specified region overlaps with this surface's region in Switch memory
bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const {
return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
@@ -283,6 +280,7 @@ struct SurfaceParams {
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
Tegra::GPUVAddr addr;
VAddr cpu_addr;
bool is_tiled;
u32 block_height;
PixelFormat pixel_format;
@@ -361,7 +359,7 @@ public:
void MarkSurfaceAsDirty(const Surface& surface);
/// Tries to find a framebuffer GPU address based on the provided CPU address
Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
Surface TryFindFramebufferSurface(Tegra::GPUVAddr gpu_addr) const;
/// Write any cached resources overlapping the region back to memory (if dirty)
void FlushRegion(Tegra::GPUVAddr addr, size_t size);
@@ -380,7 +378,7 @@ private:
void UnregisterSurface(const Surface& surface);
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
void UpdatePagesCachedCount(VAddr addr, u64 size, int delta);
std::unordered_map<SurfaceKey, Surface> surface_cache;
PageMap cached_pages;

View File

@@ -137,8 +137,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
ScreenInfo& screen_info) {
const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)};
const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
@@ -146,17 +144,18 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// only allows rows to have a memory alignement of 4.
ASSERT(framebuffer.stride % 4 == 0);
if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride,
screen_info)) {
if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer.gpu_addr + framebuffer.offset,
framebuffer.stride, screen_info)) {
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
Memory::FlushMode::Flush);
const VAddr cpu_addr{framebuffer.cpu_addr + framebuffer.offset};
Memory::RasterizerFlushVirtualRegion(cpu_addr, size_in_bytes, Memory::FlushMode::Flush);
VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4,
Memory::GetPointer(framebuffer_addr),
gl_framebuffer_data.data(), true);
Memory::GetPointer(cpu_addr), gl_framebuffer_data.data(),
true);
state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
state.Apply();