Compare commits

...

9 Commits

Author SHA1 Message Date
ReinUsesLisp
e3ec288568 gl_staging_buffer: Use glGetSynciv instead of glClientWaitSync
glGetSynciv is the intended API to query fence's signaled status.
2019-09-04 01:50:51 -03:00
ReinUsesLisp
58659a702d gl_staging_buffer: Add missing GL_CLIENT_STORAGE_BIT
Fixes a performance regression where the OpenGL server was transfering
data from server to client halting execution on texture usage up to 17ms
per frame.
2019-09-04 01:50:51 -03:00
ReinUsesLisp
b65871407e staging_buffer_cache: Remove [[nodiscard]] 2019-09-04 01:50:51 -03:00
ReinUsesLisp
f0161d2799 gl_texture_cache: clang-format fixes 2019-09-04 01:50:51 -03:00
ReinUsesLisp
6728b9b252 gl_staging_buffer: Move class declarations to cpp file and add commentaries 2019-09-04 01:50:51 -03:00
ReinUsesLisp
7ac5d2bac8 staging_buffer_cache: Disable OpenGL staging buffers for Intel proprietary drivers 2019-09-04 01:50:51 -03:00
ReinUsesLisp
256caec9ef surface_base: Minor style changes 2019-09-04 01:50:51 -03:00
ReinUsesLisp
f733682aa7 texture_cache: Implement asynchronous flushing 2019-09-04 01:50:51 -03:00
ReinUsesLisp
e6e75f1c12 staging_buffer_cache: Use OpenGL buffers to upload/download textures 2019-09-04 01:50:51 -03:00
12 changed files with 477 additions and 123 deletions

View File

@@ -64,6 +64,8 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_manager.h
renderer_opengl/gl_shader_util.cpp
renderer_opengl/gl_shader_util.h
renderer_opengl/gl_staging_buffer.cpp
renderer_opengl/gl_staging_buffer.h
renderer_opengl/gl_state.cpp
renderer_opengl/gl_state.h
renderer_opengl/gl_stream_buffer.cpp
@@ -114,6 +116,7 @@ add_library(video_core STATIC
shader/shader_ir.cpp
shader/shader_ir.h
shader/track.cpp
staging_buffer_cache.h
surface.cpp
surface.h
texture_cache/surface_base.cpp

View File

@@ -4,6 +4,8 @@
#include <array>
#include <cstddef>
#include <string_view>
#include <glad/glad.h>
#include "common/logging/log.h"
@@ -23,6 +25,9 @@ T GetInteger(GLenum pname) {
} // Anonymous namespace
Device::Device() {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const bool intel_proprietary = vendor == "Intel";
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
@@ -32,6 +37,7 @@ Device::Device() {
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = TestComponentIndexingBug();
has_broken_pbo_streaming = intel_proprietary;
}
Device::Device(std::nullptr_t) {
@@ -42,6 +48,7 @@ Device::Device(std::nullptr_t) {
has_vertex_viewport_layer = true;
has_variable_aoffi = true;
has_component_indexing_bug = false;
has_broken_pbo_streaming = false;
}
bool Device::TestVariableAoffi() {

View File

@@ -46,6 +46,10 @@ public:
return has_component_indexing_bug;
}
bool HasBrokenPBOStreaming() const {
return has_broken_pbo_streaming;
}
private:
static bool TestVariableAoffi();
static bool TestComponentIndexingBug();
@@ -58,6 +62,7 @@ private:
bool has_vertex_viewport_layer{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_broken_pbo_streaming{};
};
} // namespace OpenGL

View File

@@ -0,0 +1,169 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <glad/glad.h>
#include "common/assert.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_staging_buffer.h"
namespace OpenGL {
class PersistentStagingBuffer final : public StagingBuffer {
public:
explicit PersistentStagingBuffer(std::size_t size, bool is_read_buffer)
: is_read_buffer{is_read_buffer} {
constexpr GLenum storage_read =
GL_CLIENT_STORAGE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_READ_BIT;
constexpr GLenum storage_write =
GL_CLIENT_STORAGE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT;
constexpr GLenum map_read = GL_MAP_PERSISTENT_BIT | GL_MAP_READ_BIT;
constexpr GLenum map_write = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT |
GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT;
const GLenum storage = is_read_buffer ? storage_read : storage_write;
const GLenum map = is_read_buffer ? map_read : map_write;
buffer.Create();
glNamedBufferStorage(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, storage);
pointer = reinterpret_cast<u8*>(
glMapNamedBufferRange(buffer.handle, 0, static_cast<GLsizeiptr>(size), map));
}
~PersistentStagingBuffer() override {
if (sync) {
glDeleteSync(sync);
}
}
u8* GetOpenGLPointer() const override {
// Operations with a bound OpenGL buffer start with an offset of 0.
return nullptr;
}
u8* Map([[maybe_unused]] std::size_t size) const override {
return pointer;
}
void Unmap(std::size_t size) const override {
if (!is_read_buffer) {
// We flush the buffer on write operations
glFlushMappedNamedBufferRange(buffer.handle, 0, size);
}
}
void QueueFence(bool own) override {
DEBUG_ASSERT(!sync);
owned = own;
sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
void WaitFence() override {
DEBUG_ASSERT(sync);
switch (glClientWaitSync(sync, 0, GL_TIMEOUT_IGNORED)) {
case GL_ALREADY_SIGNALED:
case GL_CONDITION_SATISFIED:
break;
case GL_TIMEOUT_EXPIRED:
case GL_WAIT_FAILED:
UNREACHABLE_MSG("Fence wait failed");
break;
}
Discard();
}
void Discard() override {
DEBUG_ASSERT(sync);
glDeleteSync(sync);
sync = nullptr;
owned = false;
}
bool IsAvailable() override {
if (owned) {
return false;
}
if (!sync) {
return true;
}
GLint status;
glGetSynciv(sync, GL_SYNC_STATUS, sizeof(GLint), nullptr, &status);
if (status == GL_UNSIGNALED) {
return false;
}
// The fence has been signaled, we can destroy it
glDeleteSync(sync);
sync = nullptr;
return true;
}
void Bind(GLenum target) const override {
glBindBuffer(target, buffer.handle);
}
private:
OGLBuffer buffer;
GLsync sync{};
u8* pointer{};
bool is_read_buffer{};
bool owned{};
};
class CpuStagingBuffer final : public StagingBuffer {
public:
explicit CpuStagingBuffer(std::size_t size) : pointer{std::make_unique<u8[]>(size)} {}
~CpuStagingBuffer() override = default;
u8* Map([[maybe_unused]] std::size_t size) const override {
return pointer.get();
}
u8* GetOpenGLPointer() const override {
return pointer.get();
}
void Unmap([[maybe_unused]] std::size_t size) const override {}
void QueueFence(bool own) override {
// We don't queue anything here
}
void WaitFence() override {
// CPU operations are immediate, we don't wait for anything
}
void Discard() override {
UNREACHABLE_MSG("CpuStagingBuffer doesn't support deferred operations");
}
bool IsAvailable() override {
// A CPU buffer is always available, operations are immediate
return true;
}
void Bind(GLenum target) const override {
// OpenGL operations that use CPU buffers need that the target is zero
glBindBuffer(target, 0);
}
private:
std::unique_ptr<u8[]> pointer;
};
StagingBufferCache::StagingBufferCache(const Device& device)
: VideoCommon::StagingBufferCache<StagingBuffer>{!device.HasBrokenPBOStreaming()},
device{device} {}
StagingBufferCache::~StagingBufferCache() = default;
std::unique_ptr<StagingBuffer> StagingBufferCache::CreateBuffer(std::size_t size, bool is_flush) {
if (device.HasBrokenPBOStreaming()) {
return std::unique_ptr<StagingBuffer>(new CpuStagingBuffer(size));
} else {
return std::unique_ptr<StagingBuffer>(new PersistentStagingBuffer(size, is_flush));
}
}
} // namespace OpenGL

View File

@@ -0,0 +1,60 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/staging_buffer_cache.h"
namespace OpenGL {
class Device;
class StagingBuffer;
class StagingBufferCache final : public VideoCommon::StagingBufferCache<StagingBuffer> {
public:
explicit StagingBufferCache(const Device& device);
~StagingBufferCache() override;
protected:
std::unique_ptr<StagingBuffer> CreateBuffer(std::size_t size, bool is_flush) override;
private:
const Device& device;
};
class StagingBuffer : public NonCopyable {
public:
virtual ~StagingBuffer() = default;
/// Returns the base pointer passed to an OpenGL function.
[[nodiscard]] virtual u8* GetOpenGLPointer() const = 0;
/// Maps the staging buffer.
[[nodiscard]] virtual u8* Map(std::size_t size) const = 0;
/// Unmaps the staging buffer
virtual void Unmap(std::size_t size) const = 0;
/// Inserts a fence in the OpenGL pipeline.
/// @param own Protects the fence from being used before it's waited, intended for flushes.
virtual void QueueFence(bool own) = 0;
/// Waits for a fence and releases the ownership.
virtual void WaitFence() = 0;
/// Discards the deferred operation and its bound fence. A fence must be queued.
virtual void Discard() = 0;
/// Returns true when the fence is available.
[[nodiscard]] virtual bool IsAvailable() = 0;
/// Binds the staging buffer handle to an OpenGL target.
virtual void Bind(GLenum target) const = 0;
};
} // namespace OpenGL

View File

@@ -10,9 +10,11 @@
#include "core/core.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_staging_buffer.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/staging_buffer_cache.h"
#include "video_core/texture_cache/surface_base.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/textures/convert.h"
@@ -234,8 +236,9 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
} // Anonymous namespace
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params)
: VideoCommon::SurfaceBase<View>(gpu_addr, params) {
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
std::vector<u8>& temporary_buffer)
: VideoCommon::SurfaceBase<View, StagingBuffer>{gpu_addr, params, temporary_buffer} {
const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)};
internal_format = tuple.internal_format;
format = tuple.format;
@@ -251,45 +254,52 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
CachedSurface::~CachedSurface() = default;
void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
void CachedSurface::DownloadTexture(StagingBuffer& buffer) {
MICROPROFILE_SCOPE(OpenGL_Texture_Download);
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
buffer.Bind(GL_PIXEL_PACK_BUFFER);
const auto pointer_base = buffer.GetOpenGLPointer();
for (u32 level = 0; level < params.emulated_levels; ++level) {
glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
const auto mip_offset = pointer_base + params.GetHostMipmapLevelOffset(level);
if (is_compressed) {
glGetCompressedTextureImage(texture.handle, level,
static_cast<GLsizei>(params.GetHostMipmapSize(level)),
staging_buffer.data() + mip_offset);
mip_offset);
} else {
glGetTextureImage(texture.handle, level, format, type,
static_cast<GLsizei>(params.GetHostMipmapSize(level)),
staging_buffer.data() + mip_offset);
static_cast<GLsizei>(params.GetHostMipmapSize(level)), mip_offset);
}
}
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
// According to Cemu glGetTextureImage and friends do not flush, resulting in a softlock if we
// wait for a fence. To fix this we have to explicitly flush and then queue a fence.
glFlush();
buffer.QueueFence(true);
}
void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
void CachedSurface::UploadTexture(StagingBuffer& buffer) {
MICROPROFILE_SCOPE(OpenGL_Texture_Upload);
SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); });
buffer.Bind(GL_PIXEL_UNPACK_BUFFER);
const auto pointer = buffer.GetOpenGLPointer();
for (u32 level = 0; level < params.emulated_levels; ++level) {
UploadTextureMipmap(level, staging_buffer);
UploadTextureMipmap(level, pointer);
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
buffer.QueueFence(false);
}
void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
void CachedSurface::UploadTextureMipmap(u32 level, const u8* opengl_pointer) {
glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
auto compression_type = params.GetCompressionType();
const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
? params.GetConvertedMipmapOffset(level)
: params.GetHostMipmapLevelOffset(level);
const u8* buffer{staging_buffer.data() + mip_offset};
const auto compression_type = params.GetCompressionType();
const u8* mip_offset = opengl_pointer + (compression_type == SurfaceCompression::Converted
? params.GetConvertedMipmapOffset(level)
: params.GetHostMipmapLevelOffset(level));
if (is_compressed) {
const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
switch (params.target) {
@@ -297,7 +307,7 @@ void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& stagin
glCompressedTextureSubImage2D(texture.handle, level, 0, 0,
static_cast<GLsizei>(params.GetMipWidth(level)),
static_cast<GLsizei>(params.GetMipHeight(level)),
internal_format, image_size, buffer);
internal_format, image_size, mip_offset);
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
@@ -306,7 +316,7 @@ void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& stagin
static_cast<GLsizei>(params.GetMipWidth(level)),
static_cast<GLsizei>(params.GetMipHeight(level)),
static_cast<GLsizei>(params.GetMipDepth(level)),
internal_format, image_size, buffer);
internal_format, image_size, mip_offset);
break;
case SurfaceTarget::TextureCubemap: {
const std::size_t layer_size{params.GetHostLayerSize(level)};
@@ -315,8 +325,8 @@ void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& stagin
static_cast<GLsizei>(params.GetMipWidth(level)),
static_cast<GLsizei>(params.GetMipHeight(level)), 1,
internal_format, static_cast<GLsizei>(layer_size),
buffer);
buffer += layer_size;
mip_offset);
mip_offset += layer_size;
}
break;
}
@@ -327,17 +337,17 @@ void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& stagin
switch (params.target) {
case SurfaceTarget::Texture1D:
glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type,
buffer);
mip_offset);
break;
case SurfaceTarget::TextureBuffer:
ASSERT(level == 0);
glNamedBufferSubData(texture_buffer.handle, 0,
params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer);
params.GetMipWidth(level) * params.GetBytesPerPixel(), mip_offset);
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2D:
glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level),
params.GetMipHeight(level), format, type, buffer);
params.GetMipHeight(level), format, type, mip_offset);
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
@@ -345,16 +355,18 @@ void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& stagin
glTextureSubImage3D(
texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)),
static_cast<GLsizei>(params.GetMipHeight(level)),
static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer);
static_cast<GLsizei>(params.GetMipDepth(level)), format, type, mip_offset);
break;
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubemap: {
const std::size_t layer_size = params.GetHostLayerSize(level);
for (std::size_t face = 0; face < params.depth; ++face) {
glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
params.GetMipWidth(level), params.GetMipHeight(level), 1,
format, type, buffer);
buffer += params.GetHostLayerSize(level);
format, type, mip_offset);
mip_offset += layer_size;
}
break;
}
default:
UNREACHABLE();
}
@@ -452,7 +464,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
VideoCore::RasterizerInterface& rasterizer,
const Device& device)
: TextureCacheBase{system, rasterizer} {
: TextureCacheBase{system, rasterizer, std::make_unique<StagingBufferCache>(device)} {
src_framebuffer.Create();
dst_framebuffer.Create();
}
@@ -460,7 +472,7 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
TextureCacheOpenGL::~TextureCacheOpenGL() = default;
Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
return std::make_shared<CachedSurface>(gpu_addr, params);
return std::make_shared<CachedSurface>(gpu_addr, params, temporary_buffer);
}
void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
@@ -568,7 +580,6 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
static_cast<GLsizei>(source_size), nullptr);
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
@@ -604,7 +615,6 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
UNREACHABLE();
}
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glTextureBarrier();
}

View File

@@ -17,6 +17,7 @@
#include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_staging_buffer.h"
#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
@@ -26,21 +27,23 @@ using VideoCommon::ViewParams;
class CachedSurfaceView;
class CachedSurface;
class StagingBuffer;
class TextureCacheOpenGL;
using Surface = std::shared_ptr<CachedSurface>;
using View = std::shared_ptr<CachedSurfaceView>;
using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
using TextureCacheBase = VideoCommon::TextureCache<Surface, View, StagingBuffer>;
class CachedSurface final : public VideoCommon::SurfaceBase<View> {
class CachedSurface final : public VideoCommon::SurfaceBase<View, StagingBuffer> {
friend CachedSurfaceView;
public:
explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params);
explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
std::vector<u8>& temporary_buffer);
~CachedSurface();
void UploadTexture(const std::vector<u8>& staging_buffer) override;
void DownloadTexture(std::vector<u8>& staging_buffer) override;
void UploadTexture(StagingBuffer& buffer) override;
void DownloadTexture(StagingBuffer& buffer) override;
GLenum GetTarget() const {
return target;
@@ -57,7 +60,7 @@ protected:
View CreateViewInner(const ViewParams& view_key, bool is_proxy);
private:
void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer);
void UploadTextureMipmap(u32 level, const u8* opengl_pointer);
GLenum internal_format{};
GLenum format{};
@@ -138,6 +141,7 @@ private:
OGLFramebuffer src_framebuffer;
OGLFramebuffer dst_framebuffer;
std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
std::vector<u8> temporary_buffer;
};
} // namespace OpenGL

View File

@@ -172,6 +172,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
gl_framebuffer_data.data(), host_ptr);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
// Update existing texture

View File

@@ -0,0 +1,58 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <cstddef>
#include <memory>
#include <unordered_map>
#include <vector>
#include "common/bit_util.h"
#include "common/common_types.h"
namespace VideoCommon {
template <typename StagingBufferType>
class StagingBufferCache {
using Cache = std::unordered_map<u32, std::vector<std::unique_ptr<StagingBufferType>>>;
public:
explicit StagingBufferCache(bool can_flush_aot) : can_flush_aot{can_flush_aot} {}
virtual ~StagingBufferCache() = default;
StagingBufferType& GetWriteBuffer(std::size_t size) {
return GetBuffer(size, false);
}
StagingBufferType& GetReadBuffer(std::size_t size) {
return GetBuffer(size, true);
}
bool CanFlushAheadOfTime() const {
return can_flush_aot;
}
protected:
virtual std::unique_ptr<StagingBufferType> CreateBuffer(std::size_t size, bool is_flush) = 0;
private:
StagingBufferType& GetBuffer(std::size_t size, bool is_flush) {
const u32 ceil = Common::Log2Ceil64(size);
auto& buffers = (is_flush ? flush_cache : upload_cache)[ceil];
const auto it = std::find_if(buffers.begin(), buffers.end(),
[](auto& buffer) { return buffer->IsAvailable(); });
if (it != buffers.end()) {
return **it;
}
return *buffers.emplace_back(CreateBuffer(1ULL << ceil, is_flush));
}
bool can_flush_aot{};
Cache upload_cache;
Cache flush_cache;
};
} // namespace VideoCommon

View File

@@ -19,12 +19,10 @@ using Tegra::Texture::ConvertFromGuestToHost;
using VideoCore::MortonSwizzleMode;
using VideoCore::Surface::SurfaceCompression;
StagingCache::StagingCache() = default;
StagingCache::~StagingCache() = default;
SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
: params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr},
SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
std::vector<u8>& temporary_buffer)
: params{params}, temporary_buffer{temporary_buffer},
host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr},
mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) {
std::size_t offset = 0;
for (u32 level = 0; level < params.num_levels; ++level) {
@@ -45,6 +43,8 @@ SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
}
}
SurfaceBaseImpl::~SurfaceBaseImpl() = default;
MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const {
const u32 src_bpp{params.GetBytesPerPixel()};
const u32 dst_bpp{rhs.GetBytesPerPixel()};
@@ -179,10 +179,8 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf
}
}
void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
StagingCache& staging_cache) {
void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, u8* staging_buffer) {
MICROPROFILE_SCOPE(GPU_Load_Texture);
auto& staging_buffer = staging_cache.GetBuffer(0);
u8* host_ptr;
is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size);
@@ -195,9 +193,8 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
}
} else {
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
temporary_buffer.resize(guest_memory_size);
host_ptr = temporary_buffer.data();
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
@@ -207,7 +204,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
for (u32 level = 0; level < params.num_levels; ++level) {
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
staging_buffer.data() + host_offset, level);
staging_buffer + host_offset, level);
}
} else {
ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented");
@@ -218,10 +215,10 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
const u32 height{(params.height + block_height - 1) / block_height};
const u32 copy_size{width * bpp};
if (params.pitch == copy_size) {
std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes());
std::memcpy(staging_buffer, host_ptr, params.GetHostSizeInBytes());
} else {
const u8* start{host_ptr};
u8* write_to{staging_buffer.data()};
u8* write_to{staging_buffer};
for (u32 h = height; h > 0; --h) {
std::memcpy(write_to, start, copy_size);
start += params.pitch;
@@ -241,18 +238,16 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged
? in_host_offset
: params.GetConvertedMipmapOffset(level);
u8* in_buffer = staging_buffer.data() + in_host_offset;
u8* out_buffer = staging_buffer.data() + out_host_offset;
u8* in_buffer = staging_buffer + in_host_offset;
u8* out_buffer = staging_buffer + out_host_offset;
ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
params.GetMipWidth(level), params.GetMipHeight(level),
params.GetMipDepth(level), true, true);
}
}
void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
StagingCache& staging_cache) {
void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, u8* staging_buffer) {
MICROPROFILE_SCOPE(GPU_Flush_Texture);
auto& staging_buffer = staging_cache.GetBuffer(0);
u8* host_ptr;
// Handle continuouty
@@ -264,9 +259,8 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
}
} else {
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
temporary_buffer.resize(guest_memory_size);
host_ptr = temporary_buffer.data();
}
if (params.is_tiled) {
@@ -274,7 +268,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
for (u32 level = 0; level < params.num_levels; ++level) {
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
staging_buffer.data() + host_offset, level);
staging_buffer + host_offset, level);
}
} else {
ASSERT(params.target == SurfaceTarget::Texture2D);
@@ -283,10 +277,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
const u32 bpp{params.GetBytesPerPixel()};
const u32 copy_size{params.width * bpp};
if (params.pitch == copy_size) {
std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
std::memcpy(host_ptr, staging_buffer, guest_memory_size);
} else {
u8* start{host_ptr};
const u8* read_to{staging_buffer.data()};
const u8* read_to{staging_buffer};
for (u32 h = params.height; h > 0; --h) {
std::memcpy(start, read_to, copy_size);
start += params.pitch;

View File

@@ -38,32 +38,11 @@ enum class MatchTopologyResult : u32 {
None = 2,
};
class StagingCache {
public:
explicit StagingCache();
~StagingCache();
std::vector<u8>& GetBuffer(std::size_t index) {
return staging_buffer[index];
}
const std::vector<u8>& GetBuffer(std::size_t index) const {
return staging_buffer[index];
}
void SetSize(std::size_t size) {
staging_buffer.resize(size);
}
private:
std::vector<std::vector<u8>> staging_buffer;
};
class SurfaceBaseImpl {
public:
void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
void LoadBuffer(Tegra::MemoryManager& memory_manager, u8* staging_buffer);
void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
void FlushBuffer(Tegra::MemoryManager& memory_manager, u8* staging_buffer);
GPUVAddr GetGpuAddr() const {
return gpu_addr;
@@ -161,12 +140,15 @@ public:
}
protected:
explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params);
~SurfaceBaseImpl() = default;
explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
std::vector<u8>& temporary_buffer);
~SurfaceBaseImpl();
virtual void DecorateSurfaceName() = 0;
const SurfaceParams params;
std::vector<u8>& temporary_buffer;
std::size_t layer_size;
std::size_t guest_memory_size;
const std::size_t host_memory_size;
@@ -188,25 +170,40 @@ private:
std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const;
};
template <typename TView>
template <typename TView, typename StagingBufferType>
class SurfaceBase : public SurfaceBaseImpl {
public:
virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0;
virtual void UploadTexture(StagingBufferType& buffer) = 0;
virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
virtual void DownloadTexture(StagingBufferType& buffer) = 0;
void SetFlushBuffer(StagingBufferType* buffer) {
flush_buffer = buffer;
}
StagingBufferType* GetFlushBuffer() const {
return flush_buffer;
}
void MarkAsModified(const bool is_modified_, const u64 tick) {
is_modified = is_modified_ || is_target;
modification_tick = tick;
if (is_modified && flush_buffer) {
// The buffer has been modified while we thought it was no longer being to be used and
// we queued a flush.
flush_buffer->Discard();
flush_buffer = nullptr;
}
}
void MarkAsRenderTarget(const bool is_target, const u32 index) {
this->is_target = is_target;
this->index = index;
void MarkAsRenderTarget(const bool is_target_, const u32 index_) {
is_target = is_target_;
index = index_;
}
void MarkAsPicked(const bool is_picked) {
this->is_picked = is_picked;
void MarkAsPicked(const bool is_picked_) {
is_picked = is_picked_;
}
bool IsModified() const {
@@ -214,7 +211,7 @@ public:
}
bool IsProtected() const {
// Only 3D Slices are to be protected
// Only 3D slices are to be protected
return is_target && params.block_depth > 0;
}
@@ -292,8 +289,9 @@ public:
}
protected:
explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params)
: SurfaceBaseImpl(gpu_addr, params) {}
explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params,
std::vector<u8>& temporary_buffer)
: SurfaceBaseImpl{gpu_addr, params, temporary_buffer} {}
~SurfaceBase() = default;
@@ -320,6 +318,8 @@ private:
bool is_picked{};
u32 index{NO_RT};
u64 modification_tick{};
StagingBufferType* flush_buffer{};
};
} // namespace VideoCommon

View File

@@ -27,6 +27,7 @@
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/staging_buffer_cache.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/copy_params.h"
#include "video_core/texture_cache/surface_base.h"
@@ -48,7 +49,7 @@ using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
template <typename TSurface, typename TView>
template <typename TSurface, typename TView, typename StagingBufferType>
class TextureCache {
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
using IntervalType = typename IntervalMap::interval_type;
@@ -62,10 +63,10 @@ public:
}
}
/***
/**
* `Guard` guarantees that rendertargets don't unregister themselves if the
* collide. Protection is currently only done on 3D slices.
***/
*/
void GuardRenderTargets(bool new_guard) {
guard_render_targets = new_guard;
}
@@ -132,12 +133,18 @@ public:
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
if (depth_buffer.target)
if (auto& old_target = depth_buffer.target; old_target != surface_view.first) {
FlushAoT(old_target);
}
if (depth_buffer.target) {
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
}
depth_buffer.target = surface_view.first;
depth_buffer.view = surface_view.second;
if (depth_buffer.target)
if (depth_buffer.target) {
depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
}
return surface_view.second;
}
@@ -166,12 +173,18 @@ public:
auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
preserve_contents, true);
if (render_targets[index].target)
if (auto& old_target = render_targets[index].target; old_target != surface_view.first) {
FlushAoT(old_target);
}
if (render_targets[index].target) {
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
}
render_targets[index].target = surface_view.first;
render_targets[index].view = surface_view.second;
if (render_targets[index].target)
if (render_targets[index].target) {
render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
}
return surface_view.second;
}
@@ -188,19 +201,25 @@ public:
}
void SetEmptyDepthBuffer() {
if (depth_buffer.target == nullptr) {
auto& target = depth_buffer.target;
if (target == nullptr) {
return;
}
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
FlushAoT(target);
target->MarkAsRenderTarget(false, NO_RT);
depth_buffer.target = nullptr;
depth_buffer.view = nullptr;
}
void SetEmptyColorBuffer(std::size_t index) {
if (render_targets[index].target == nullptr) {
auto& target = render_targets[index].target;
if (target == nullptr) {
return;
}
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
FlushAoT(target);
target->MarkAsRenderTarget(false, NO_RT);
render_targets[index].target = nullptr;
render_targets[index].view = nullptr;
}
@@ -235,14 +254,15 @@ public:
}
protected:
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
: system{system}, rasterizer{rasterizer} {
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
std::unique_ptr<StagingBufferCache<StagingBufferType>> staging_buffer_cache)
: system{system}, rasterizer{rasterizer}, staging_buffer_cache{
std::move(staging_buffer_cache)} {
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
SetEmptyColorBuffer(i);
}
SetEmptyDepthBuffer();
staging_cache.SetSize(2);
const auto make_siblings = [this](PixelFormat a, PixelFormat b) {
siblings_table[static_cast<std::size_t>(a)] = b;
@@ -687,9 +707,13 @@ private:
}
void LoadSurface(const TSurface& surface) {
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
surface->UploadTexture(staging_cache.GetBuffer(0));
const auto host_size = surface->GetHostSizeInBytes();
auto& buffer = staging_buffer_cache->GetWriteBuffer(host_size);
surface->LoadBuffer(system.GPU().MemoryManager(), buffer.Map(host_size));
buffer.Unmap(host_size);
surface->UploadTexture(buffer);
surface->MarkAsModified(false, Tick());
}
@@ -697,9 +721,18 @@ private:
if (!surface->IsModified()) {
return;
}
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
surface->DownloadTexture(staging_cache.GetBuffer(0));
surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
const auto host_size = surface->GetHostSizeInBytes();
auto buffer = surface->GetFlushBuffer();
if (!buffer) {
buffer = &staging_buffer_cache->GetReadBuffer(host_size);
surface->DownloadTexture(*buffer);
}
buffer->WaitFence();
surface->SetFlushBuffer(nullptr);
surface->FlushBuffer(system.GPU().MemoryManager(), buffer->Map(host_size));
buffer->Unmap(host_size);
surface->MarkAsModified(false, Tick());
}
@@ -767,6 +800,16 @@ private:
return {};
}
void FlushAoT(TSurface& surface) {
if (staging_buffer_cache->CanFlushAheadOfTime() || !surface || !surface->IsLinear() ||
surface->GetFlushBuffer()) {
return;
}
auto& buffer = staging_buffer_cache->GetReadBuffer(surface->GetHostSizeInBytes());
surface->DownloadTexture(buffer);
surface->SetFlushBuffer(&buffer);
}
constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
return siblings_table[static_cast<std::size_t>(format)];
}
@@ -813,7 +856,7 @@ private:
std::vector<TSurface> sampled_textures;
StagingCache staging_cache;
std::unique_ptr<StagingBufferCache<StagingBufferType>> staging_buffer_cache;
std::recursive_mutex mutex;
};