Compare commits

...

6 Commits

Author SHA1 Message Date
bunnei
77d50eb970 Merge 8bff67e0c1 into c93ea96366 2018-04-18 02:44:50 +00:00
bunnei
8bff67e0c1 gl_shader_decompiler: HACK: Remove some unreachables so games run. 2018-04-17 22:42:07 -04:00
bunnei
c71b78f3bc gl_rasterizer_cache: HACK: Pre-swap raw textures before uploading them.
- Temporary fix for Cave Story, will be removed before merging.
2018-04-17 22:25:30 -04:00
bunnei
24a47f6e18 gl_shader_gen: Support vertical/horizontal viewport flipping. 2018-04-17 22:25:29 -04:00
bunnei
e4d3f578d1 renderer_opengl: Support unswizzled textures. 2018-04-17 22:23:14 -04:00
bunnei
185556025e (jroweboy) textures: Add support for other formats. 2018-04-17 22:21:44 -04:00
13 changed files with 186 additions and 76 deletions

View File

@@ -218,9 +218,6 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear,
"TIC versions other than BlockLinear are unimplemented");
ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
(tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),
"Texture types other than Texture2D are unimplemented");

View File

@@ -319,7 +319,14 @@ public:
}
} rt[NumRenderTargets];
INSERT_PADDING_WORDS(0x80);
f32 viewport_scale_x;
f32 viewport_scale_y;
f32 viewport_scale_z;
u32 viewport_translate_x;
u32 viewport_translate_y;
u32 viewport_translate_z;
INSERT_PADDING_WORDS(0x7A);
struct {
union {
@@ -649,6 +656,12 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_scale_x, 0x280);
ASSERT_REG_POSITION(viewport_scale_y, 0x281);
ASSERT_REG_POSITION(viewport_scale_z, 0x282);
ASSERT_REG_POSITION(viewport_translate_x, 0x283);
ASSERT_REG_POSITION(viewport_translate_y, 0x284);
ASSERT_REG_POSITION(viewport_translate_z, 0x285);
ASSERT_REG_POSITION(viewport, 0x300);
ASSERT_REG_POSITION(vertex_buffer, 0x35D);
ASSERT_REG_POSITION(zeta, 0x3F8);

View File

@@ -523,7 +523,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu
src_params.width = std::min(framebuffer.width, pixel_stride);
src_params.height = framebuffer.height;
src_params.stride = pixel_stride;
src_params.is_tiled = false;
src_params.is_tiled = true;
src_params.pixel_format =
SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
src_params.UpdateParams();

View File

@@ -20,6 +20,7 @@
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/swap.h"
#include "common/vector_math.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
@@ -51,9 +52,14 @@ static constexpr std::array<FormatTuple, 1> fb_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8
}};
static constexpr std::array<FormatTuple, 2> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8
{GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1
static constexpr std::array<FormatTuple, 7> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8
{GL_RGB5_A1, GL_RGB, GL_UNSIGNED_SHORT_5_5_5_1, false, 1}, // RGB5A1
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, false, 1}, // RGB565
{GL_R11F_G11F_B10F, GL_RGB, GL_FLOAT, false, 1}, // R11FG11FB10F
{GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // BC1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // BC2
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // BC3
}};
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
@@ -85,24 +91,8 @@ static u16 GetResolutionScaleFactor() {
}
template <bool morton_to_gl, PixelFormat format>
static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
for (u32 y = 0; y < 8; ++y) {
for (u32 x = 0; x < 8; ++x) {
u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel;
if (morton_to_gl) {
std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel);
} else {
std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel);
}
}
}
}
template <bool morton_to_gl, PixelFormat format>
void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) {
void MortonCopy(u32 stride, u32 height, u32 block_height, u8* gl_buffer, VAddr base, VAddr start,
VAddr end) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
@@ -114,27 +104,67 @@ void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start,
}
template <>
void MortonCopy<true, PixelFormat::DXT1>(u32 stride, u32 height, u8* gl_buffer, VAddr base,
VAddr start, VAddr end) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1);
void MortonCopy<true, PixelFormat::BC1>(u32 stride, u32 height, u32 block_height, u8* gl_buffer,
VAddr base, VAddr start, VAddr end) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::BC1) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::BC1);
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
// configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
auto data =
Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height);
auto data = Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::BC1, stride,
height, block_height);
std::memcpy(gl_buffer, data.data(), data.size());
}
static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns = {
MortonCopy<true, PixelFormat::RGBA8>,
MortonCopy<true, PixelFormat::DXT1>,
template <>
void MortonCopy<true, PixelFormat::BC2>(u32 stride, u32 height, u32 block_height, u8* gl_buffer,
VAddr base, VAddr start, VAddr end) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::BC2) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::BC2);
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
// configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
auto data = Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::BC2, stride,
height, block_height);
std::memcpy(gl_buffer, data.data(), data.size());
}
template <>
void MortonCopy<true, PixelFormat::BC3>(u32 stride, u32 height, u32 block_height, u8* gl_buffer,
VAddr base, VAddr start, VAddr end) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::BC3) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::BC3);
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
// configuration for this and perform more generic un/swizzle
// NGLOG_CRITICAL(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
auto data = Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::BC3, stride,
height, block_height);
std::memcpy(gl_buffer, data.data(), data.size());
}
static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), 7> morton_to_gl_fns =
{
MortonCopy<true, PixelFormat::RGBA8>, // RGBA8
MortonCopy<true, PixelFormat::RGB5A1>, // RGB5A1
MortonCopy<true, PixelFormat::RGB565>, // RGB565
MortonCopy<true, PixelFormat::RG11FB10F>, // RG11FB10F
MortonCopy<true, PixelFormat::BC1>, // BC1
MortonCopy<true, PixelFormat::BC2>, // BC2
MortonCopy<true, PixelFormat::BC3>, // BC3
};
static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns = {
MortonCopy<false, PixelFormat::RGBA8>,
MortonCopy<false, PixelFormat::DXT1>,
static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), 7> gl_to_morton_fns =
{
MortonCopy<false, PixelFormat::RGBA8>, // RGBA8
MortonCopy<false, PixelFormat::RGB5A1>, // RGB5A1
MortonCopy<false, PixelFormat::RGB565>, // RGB565
MortonCopy<false, PixelFormat::RG11FB10F>, // RG11FB10F
MortonCopy<false, PixelFormat::BC1>, // BC1
MortonCopy<false, PixelFormat::BC2>, // BC2
MortonCopy<false, PixelFormat::BC3>, // BC3
};
// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -483,16 +513,18 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) {
if (!is_tiled) {
ASSERT(type == SurfaceType::Color);
const u32 bytes_per_pixel{GetFormatBpp() >> 3};
std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
bytes_per_pixel * width * height);
// TODO(bunnei): HACK HACK HACK - Remove before checkin!
u32* gl_words = reinterpret_cast<u32*>(&gl_buffer[start_offset]);
for (unsigned index = 0; index < width * height; ++index) {
gl_words[index] = Common::swap32(gl_words[index]);
}
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
// the configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4,
texture_src_data + start_offset, &gl_buffer[start_offset],
true);
} else {
morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
load_start, load_end);
morton_to_gl_fns[static_cast<size_t>(pixel_format)](
stride, height, block_height, &gl_buffer[0], addr, load_start, load_end);
}
}
@@ -536,8 +568,8 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) {
ASSERT(type == SurfaceType::Color);
std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start);
} else {
gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
flush_start, flush_end);
gl_to_morton_fns[static_cast<size_t>(pixel_format)](
stride, height, block_height, &gl_buffer[0], addr, flush_start, flush_end);
}
}
@@ -1040,6 +1072,7 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
params.width = config.tic.Width();
params.height = config.tic.Height();
params.is_tiled = config.tic.IsTiled();
params.block_height = config.tic.BlockHeight();
params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
params.UpdateParams();

View File

@@ -52,8 +52,17 @@ enum class ScaleMatch {
struct SurfaceParams {
enum class PixelFormat {
// Texture and color buffer formats
RGBA8 = 0,
DXT1 = 1,
RGB5A1 = 1,
RGB565 = 2,
RG11FB10F = 3,
// Compressed Texture formats
BC1 = 4,
BC2 = 5,
BC3 = 6,
Invalid = 255,
};
@@ -70,9 +79,14 @@ struct SurfaceParams {
if (format == PixelFormat::Invalid)
return 0;
constexpr std::array<unsigned int, 2> bpp_table = {
32, // RGBA8
64, // DXT1
constexpr std::array<unsigned int, 7> bpp_table = {
32, // RGBA8
16, // RGB5A1
16, // RGB565
32, // RG11FB10F
64, // BC1
128, // BC2
128, // BC3
};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -107,8 +121,12 @@ struct SurfaceParams {
switch (format) {
case Tegra::Texture::TextureFormat::A8R8G8B8:
return PixelFormat::RGBA8;
case Tegra::Texture::TextureFormat::DXT1:
return PixelFormat::DXT1;
case Tegra::Texture::TextureFormat::BC1:
return PixelFormat::BC1;
case Tegra::Texture::TextureFormat::BC2:
return PixelFormat::BC2;
case Tegra::Texture::TextureFormat::BC3:
return PixelFormat::BC3;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -140,7 +158,7 @@ struct SurfaceParams {
return SurfaceType::Color;
}
if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::DXT1)) {
if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::BC3)) {
return SurfaceType::Texture;
}
@@ -213,9 +231,10 @@ struct SurfaceParams {
u32 width = 0;
u32 height = 0;
u32 stride = 0;
u32 block_height = 0;
u16 res_scale = 1;
bool is_tiled = false;
bool is_tiled = true;
PixelFormat pixel_format = PixelFormat::Invalid;
SurfaceType type = SurfaceType::Invalid;
};

View File

@@ -336,7 +336,7 @@ private:
NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {} ({}): {}",
static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
OpCode::GetInfo(instr.opcode).name, instr.hex);
UNREACHABLE();
// UNREACHABLE();
}
}
break;
@@ -433,7 +433,7 @@ private:
NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {} ({}): {}",
static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
OpCode::GetInfo(instr.opcode).name, instr.hex);
UNREACHABLE();
// UNREACHABLE();
}
}

View File

@@ -29,9 +29,15 @@ out gl_PerVertex {
out vec4 position;
layout (std140) uniform vs_config {
vec4 viewport_flip;
};
void main() {
exec_shader();
// Viewport can be flipped, which is unsupported by glViewport
position.xy *= viewport_flip.xy;
gl_Position = position;
}
)";
@@ -52,6 +58,10 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo
in vec4 position;
out vec4 color;
layout (std140) uniform fs_config {
vec4 viewport_flip;
};
uniform sampler2D tex[32];
void main() {

View File

@@ -53,6 +53,10 @@ void SetShaderSamplerBindings(GLuint shader) {
} // namespace Impl
void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {}
void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
viewport_flip[0] = regs.viewport_scale_x < 0.0 ? -1.0 : 1.0;
viewport_flip[1] = regs.viewport_scale_y < 0.0 ? -1.0 : 1.0;
}
} // namespace GLShader

View File

@@ -30,10 +30,9 @@ void SetShaderSamplerBindings(GLuint shader);
// Not following that rule will cause problems on some AMD drivers.
struct MaxwellUniformData {
void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
// TODO(Subv): Use this for something.
alignas(16) GLvec4 viewport_flip;
};
// static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is
// incorrect");
static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");

View File

@@ -45,18 +45,26 @@ static void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out
u32 BytesPerPixel(TextureFormat format) {
switch (format) {
case TextureFormat::DXT1:
// In this case a 'pixel' actually refers to a 4x4 tile.
return 8;
case TextureFormat::A8R8G8B8:
case TextureFormat::BF10GF11RF11:
return 4;
case TextureFormat::A1B5G5R5:
case TextureFormat::B5G6R5:
return 2;
// In this case a 'pixel' actually refers to a 4x4 tile.
case TextureFormat::BC1:
return 8;
case TextureFormat::BC2:
case TextureFormat::BC3:
return 16;
default:
UNIMPLEMENTED_MSG("Format not implemented");
break;
}
}
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) {
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
u32 block_height) {
u8* data = Memory::GetPointer(address);
u32 bytes_per_pixel = BytesPerPixel(format);
@@ -65,11 +73,21 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
switch (format) {
case TextureFormat::DXT1:
// In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values.
case TextureFormat::BC1:
// In the BC1 format, each 4x4 tile is swizzled instead of just individual pixel values.
CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, DefaultBlockHeight);
break;
case TextureFormat::BC2:
// TODO
CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, DefaultBlockHeight);
break;
case TextureFormat::BC3:
// TODO
CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, block_height);
break;
case TextureFormat::A8R8G8B8:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, DefaultBlockHeight);
@@ -88,8 +106,13 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
// TODO(Subv): Implement.
switch (format) {
case TextureFormat::DXT1:
case TextureFormat::BC1:
case TextureFormat::BC2:
case TextureFormat::BC3:
case TextureFormat::A8R8G8B8:
case TextureFormat::A1B5G5R5:
case TextureFormat::B5G6R5:
case TextureFormat::BF10GF11RF11:
// TODO(Subv): For the time being just forward the same data without any decoding.
rgba_data = texture_data;
break;

View File

@@ -14,7 +14,8 @@ namespace Texture {
/**
* Unswizzles a swizzled texture without changing its format.
*/
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height);
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
u32 block_height);
/**
* Decodes an unswizzled texture into a A8R8G8B8 texture.

View File

@@ -14,9 +14,13 @@ namespace Texture {
enum class TextureFormat : u32 {
A8R8G8B8 = 0x8,
DXT1 = 0x24,
DXT23 = 0x25,
DXT45 = 0x26,
A1B5G5R5 = 0x14,
B5G6R5 = 0x15,
BF10GF11RF11 = 0x21,
// Compressed Textures
BC1 = 0x24,
BC2 = 0x25,
BC3 = 0x26,
};
enum class TextureType : u32 {
@@ -70,7 +74,10 @@ struct TICEntry {
BitField<0, 16, u32> address_high;
BitField<21, 3, TICHeaderVersion> header_version;
};
INSERT_PADDING_BYTES(4);
union {
BitField<3, 3, u8> gobs_per_block;
};
INSERT_PADDING_BYTES(3);
union {
BitField<0, 16, u32> width_minus_1;
BitField<23, 4, TextureType> texture_type;
@@ -94,6 +101,10 @@ struct TICEntry {
return header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey;
}
u32 BlockHeight() const {
return 1 << gobs_per_block;
}
};
static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");

View File

@@ -378,8 +378,8 @@ void GraphicsSurfaceWidget::OnUpdate() {
QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
VAddr address = gpu.memory_manager->PhysicalToVirtualAddress(surface_address);
auto unswizzled_data =
Tegra::Texture::UnswizzleTexture(address, surface_format, surface_width, surface_height);
auto unswizzled_data = Tegra::Texture::UnswizzleTexture(address, surface_format, surface_width,
surface_height, 16);
auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
surface_width, surface_height);