Compare commits

..

10 Commits

Author SHA1 Message Date
raven02
2845348608 Implement ASTC_2D_8X8 (Bayonetta 2) 2018-09-17 01:04:27 +08:00
bunnei
ba480ea2fb Merge pull request #1273 from Subv/ld_sizes
Shaders: Implemented multiple-word loads and stores to and from attribute memory.
2018-09-15 15:27:12 -04:00
bunnei
daee15b058 Merge pull request #1271 from Subv/kepler_engine
GPU: Basic implementation of the Kepler Inline Memory engine (p2mf).
2018-09-15 13:27:07 -04:00
Subv
c878a819d7 Shaders: Implemented multiple-word loads and stores to and from attribute memory.
This seems to be an optimization performed by nouveau.
2018-09-15 11:21:21 -05:00
bunnei
df5a44a40b Merge pull request #1310 from lioncash/kernel-ns
kernel/thread: Include thread-related enums within the kernel namespace
2018-09-13 19:50:47 -04:00
bunnei
fb65076b0f Merge pull request #1309 from lioncash/nested
service: Use nested namespace specifiers where applicable
2018-09-13 19:50:11 -04:00
bunnei
3ef134a092 Merge pull request #1307 from lioncash/pl
services/pl_u: Add missing Korean font to the fallback case for shared fonts
2018-09-13 19:49:39 -04:00
Lioncash
a0e51d8b98 service: Use nested namespace specifiers where applicable
There were a few places where nested namespace specifiers weren't being
used where they could be within the service code. This amends that to
make the namespacing a tiny bit more compact.
2018-09-13 15:52:55 -04:00
Lioncash
ce97d8ef6c services/pl_u: Add missing Korean font to the fallback case for shared fonts
Previously this wasn't using the Korean font at all.
2018-09-12 19:23:51 -04:00
Subv
bb5eb4f20a GPU: Basic implementation of the Kepler Inline Memory engine (p2mf).
This engine writes data from a FIFO register into the configured address.
2018-09-12 13:57:08 -05:00
15 changed files with 234 additions and 27 deletions

View File

@@ -6,8 +6,7 @@
#include "core/hle/service/acc/acc.h"
namespace Service {
namespace Account {
namespace Service::Account {
class ACC_SU final : public Module::Interface {
public:
@@ -16,5 +15,4 @@ public:
~ACC_SU() override;
};
} // namespace Account
} // namespace Service
} // namespace Service::Account

View File

@@ -253,14 +253,16 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
LOG_WARNING(Service_NS,
"Shared Font file missing. Loading open source replacement from memory");
// clang-format off
const std::vector<std::vector<u8>> open_source_shared_fonts_ttf = {
{std::begin(FontChineseSimplified), std::end(FontChineseSimplified)},
{std::begin(FontChineseTraditional), std::end(FontChineseTraditional)},
{std::begin(FontExtendedChineseSimplified),
std::end(FontExtendedChineseSimplified)},
{std::begin(FontExtendedChineseSimplified), std::end(FontExtendedChineseSimplified)},
{std::begin(FontKorean), std::end(FontKorean)},
{std::begin(FontNintendoExtended), std::end(FontNintendoExtended)},
{std::begin(FontStandard), std::end(FontStandard)},
};
// clang-format on
for (const std::vector<u8>& font_ttf : open_source_shared_fonts_ttf) {
const FontRegion region{static_cast<u32>(offset + 8),

View File

@@ -9,8 +9,7 @@
#include "core/core.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
namespace Service {
namespace NVFlinger {
namespace Service::NVFlinger {
BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) {
auto& kernel = Core::System::GetInstance().Kernel();
@@ -104,5 +103,4 @@ u32 BufferQueue::Query(QueryType type) {
return 0;
}
} // namespace NVFlinger
} // namespace Service
} // namespace Service::NVFlinger

View File

@@ -15,8 +15,7 @@ namespace CoreTiming {
struct EventType;
}
namespace Service {
namespace NVFlinger {
namespace Service::NVFlinger {
struct IGBPBuffer {
u32_le magic;
@@ -98,5 +97,4 @@ private:
Kernel::SharedPtr<Kernel::Event> buffer_wait_event;
};
} // namespace NVFlinger
} // namespace Service
} // namespace Service::NVFlinger

View File

@@ -5,6 +5,8 @@ add_library(video_core STATIC
debug_utils/debug_utils.h
engines/fermi_2d.cpp
engines/fermi_2d.h
engines/kepler_memory.cpp
engines/kepler_memory.h
engines/maxwell_3d.cpp
engines/maxwell_3d.h
engines/maxwell_compute.cpp

View File

@@ -14,6 +14,7 @@
#include "core/tracer/recorder.h"
#include "video_core/command_processor.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_compute.h"
#include "video_core/engines/maxwell_dma.h"
@@ -69,6 +70,9 @@ void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
case EngineID::MAXWELL_DMA_COPY_A:
maxwell_dma->WriteReg(method, value);
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
kepler_memory->WriteReg(method, value);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
}

View File

@@ -0,0 +1,45 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/logging/log.h"
#include "core/memory.h"
#include "video_core/engines/kepler_memory.h"
namespace Tegra::Engines {
KeplerMemory::KeplerMemory(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
KeplerMemory::~KeplerMemory() = default;
void KeplerMemory::WriteReg(u32 method, u32 value) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid KeplerMemory register, increase the size of the Regs structure");
regs.reg_array[method] = value;
switch (method) {
case KEPLERMEMORY_REG_INDEX(exec): {
state.write_offset = 0;
break;
}
case KEPLERMEMORY_REG_INDEX(data): {
ProcessData(value);
break;
}
}
}
void KeplerMemory::ProcessData(u32 data) {
ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
GPUVAddr address = regs.dest.Address();
VAddr dest_address =
*memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
Memory::Write32(dest_address, data);
state.write_offset++;
}
} // namespace Tegra::Engines

View File

@@ -0,0 +1,90 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/memory_manager.h"
namespace Tegra::Engines {
#define KEPLERMEMORY_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
class KeplerMemory final {
public:
KeplerMemory(MemoryManager& memory_manager);
~KeplerMemory();
/// Write the value to the register identified by method.
void WriteReg(u32 method, u32 value);
struct Regs {
static constexpr size_t NUM_REGS = 0x7F;
union {
struct {
INSERT_PADDING_WORDS(0x60);
u32 line_length_in;
u32 line_count;
struct {
u32 address_high;
u32 address_low;
u32 pitch;
u32 block_dimensions;
u32 width;
u32 height;
u32 depth;
u32 z;
u32 x;
u32 y;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} dest;
struct {
union {
BitField<0, 1, u32> linear;
};
} exec;
u32 data;
INSERT_PADDING_WORDS(0x11);
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
struct {
u32 write_offset = 0;
} state{};
private:
MemoryManager& memory_manager;
void ProcessData(u32 data);
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(line_length_in, 0x60);
ASSERT_REG_POSITION(line_count, 0x61);
ASSERT_REG_POSITION(dest, 0x62);
ASSERT_REG_POSITION(exec, 0x6C);
ASSERT_REG_POSITION(data, 0x6D);
#undef ASSERT_REG_POSITION
} // namespace Tegra::Engines

View File

@@ -67,6 +67,13 @@ private:
u64 value{};
};
enum class AttributeSize : u64 {
Word = 0,
DoubleWord = 1,
TripleWord = 2,
QuadWord = 3,
};
union Attribute {
Attribute() = default;
@@ -87,9 +94,10 @@ union Attribute {
};
union {
BitField<20, 10, u64> immediate;
BitField<22, 2, u64> element;
BitField<24, 6, Index> index;
BitField<47, 3, u64> size;
BitField<47, 3, AttributeSize> size;
} fmt20;
union {

View File

@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_compute.h"
#include "video_core/engines/maxwell_dma.h"
@@ -27,6 +28,7 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(*memory_manager);
}
GPU::~GPU() = default;

View File

@@ -102,6 +102,7 @@ class Fermi2D;
class Maxwell3D;
class MaxwellCompute;
class MaxwellDMA;
class KeplerMemory;
} // namespace Engines
enum class EngineID {
@@ -146,6 +147,8 @@ private:
std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
/// DMA engine
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
};
} // namespace Tegra

View File

@@ -167,6 +167,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
// Depth formats
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
@@ -213,6 +214,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
static bool IsPixelFormatASTC(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
case PixelFormat::ASTC_2D_8X8:
return true;
default:
return false;
@@ -223,6 +225,8 @@ static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
return {4, 4};
case PixelFormat::ASTC_2D_8X8:
return {8, 8};
default:
LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
UNREACHABLE();
@@ -327,6 +331,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
MortonCopy<true, PixelFormat::RG8S>,
MortonCopy<true, PixelFormat::RG32UI>,
MortonCopy<true, PixelFormat::R32UI>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
MortonCopy<true, PixelFormat::Z32F>,
MortonCopy<true, PixelFormat::Z16>,
MortonCopy<true, PixelFormat::Z24S8>,
@@ -386,6 +391,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
MortonCopy<false, PixelFormat::RG8S>,
MortonCopy<false, PixelFormat::RG32UI>,
MortonCopy<false, PixelFormat::R32UI>,
nullptr,
MortonCopy<false, PixelFormat::Z32F>,
MortonCopy<false, PixelFormat::Z16>,
MortonCopy<false, PixelFormat::Z24S8>,
@@ -544,7 +550,8 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
u32 width, u32 height) {
switch (pixel_format) {
case PixelFormat::ASTC_2D_4X4: {
case PixelFormat::ASTC_2D_4X4:
case PixelFormat::ASTC_2D_8X8: {
// Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
u32 block_width{};
u32 block_height{};

View File

@@ -70,19 +70,20 @@ struct SurfaceParams {
RG8S = 42,
RG32UI = 43,
R32UI = 44,
ASTC_2D_8X8 = 45,
MaxColorFormat,
// Depth formats
Z32F = 45,
Z16 = 46,
Z32F = 46,
Z16 = 47,
MaxDepthFormat,
// DepthStencil formats
Z24S8 = 47,
S8Z24 = 48,
Z32FS8 = 49,
Z24S8 = 48,
S8Z24 = 49,
Z32FS8 = 50,
MaxDepthStencilFormat,
@@ -192,6 +193,7 @@ struct SurfaceParams {
1, // RG8S
1, // RG32UI
1, // R32UI
4, // ASTC_2D_8X8
1, // Z32F
1, // Z16
1, // Z24S8
@@ -253,6 +255,7 @@ struct SurfaceParams {
16, // RG8S
64, // RG32UI
32, // R32UI
16, // ASTC_2D_8X8
32, // Z32F
16, // Z16
32, // Z24S8
@@ -522,6 +525,8 @@ struct SurfaceParams {
return PixelFormat::BC6H_SF16;
case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
return PixelFormat::ASTC_2D_4X4;
case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
return PixelFormat::ASTC_2D_8X8;
case Tegra::Texture::TextureFormat::R16_G16:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:

View File

@@ -1772,13 +1772,34 @@ private:
case OpCode::Type::Memory: {
switch (opcode->GetId()) {
case OpCode::Id::LD_A: {
ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
// Note: Shouldn't this be interp mode flat? As in no interpolation made.
ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
"Indirect attribute loads are not supported");
ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
"Unaligned attribute loads are not supported");
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
Tegra::Shader::IpaSampleMode::Default};
regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
instr.attribute.fmt20.index, input_mode);
u32 next_element = instr.attribute.fmt20.element;
u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
const auto LoadNextElement = [&](u32 reg_offset) {
regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
static_cast<Attribute::Index>(next_index),
input_mode);
// Load the next attribute element into the following register. If the element
// to load goes beyond the vec4 size, load the first element of the next
// attribute.
next_element = (next_element + 1) % 4;
next_index = next_index + (next_element == 0 ? 1 : 0);
};
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
LoadNextElement(reg_offset);
}
break;
}
case OpCode::Id::LD_C: {
@@ -1820,9 +1841,31 @@ private:
break;
}
case OpCode::Id::ST_A: {
ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
instr.attribute.fmt20.element, instr.gpr0);
ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
"Indirect attribute loads are not supported");
ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
"Unaligned attribute loads are not supported");
u32 next_element = instr.attribute.fmt20.element;
u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
const auto StoreNextElement = [&](u32 reg_offset) {
regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
next_element,
instr.gpr0.Value() + reg_offset);
// Load the next attribute element into the following register. If the element
// to load goes beyond the vec4 size, load the first element of the next
// attribute.
next_element = (next_element + 1) % 4;
next_index = next_index + (next_element == 0 ? 1 : 0);
};
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
StoreNextElement(reg_offset);
}
break;
}
case OpCode::Id::TEX: {

View File

@@ -63,6 +63,7 @@ u32 BytesPerPixel(TextureFormat format) {
case TextureFormat::R32_G32_B32:
return 12;
case TextureFormat::ASTC_2D_4X4:
case TextureFormat::ASTC_2D_8X8:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::BF10GF11RF11:
@@ -111,6 +112,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
case TextureFormat::BC6H_UF16:
case TextureFormat::BC6H_SF16:
case TextureFormat::ASTC_2D_4X4:
case TextureFormat::ASTC_2D_8X8:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::A1B5G5R5: