Compare commits

...

52 Commits

Author SHA1 Message Date
Melissa Goad
a9571a383d fixup 2018-04-19 04:01:43 -05:00
Melissa Goad
95691e1a8d Add RGB10_A2 texture formats 2018-04-19 03:29:29 -05:00
bunnei
60e6e8953e Merge pull request #351 from Subv/tex_formats
GPU: Implemented the B5G6R5 format.
2018-04-18 20:20:51 -04:00
Subv
2985056340 GPU: Implemented the B5G6R5 format. 2018-04-18 18:16:45 -05:00
bunnei
ce4f159b1c gl_shader_gen: Support vertical/horizontal viewport flipping. (#347)
* gl_shader_gen: Support vertical/horizontal viewport flipping.

* fixup! gl_shader_gen: Support vertical/horizontal viewport flipping.
2018-04-18 16:42:40 -04:00
bunnei
6a999cf800 Merge pull request #350 from Subv/tex_components
GPU: Fixed the incorrect component order in ABGR8 textures.
2018-04-18 15:36:00 -04:00
Subv
43d98ca8fe GLCache: Added boilerplate code to make supporting configurable texture component types.
For now only the UNORM type is supported.
2018-04-18 14:17:28 -05:00
Subv
5b3fab6766 GLCache: Unify texture and framebuffer formats when converting to OpenGL. 2018-04-18 14:17:28 -05:00
Subv
b2c1672e10 GPU: Texture format 8 and framebuffer format 0xD5 are actually ABGR8. 2018-04-18 14:17:27 -05:00
bunnei
d3f9ea90e7 Merge pull request #349 from Subv/texturing
GPU: Support non-tiled textures and configurable block height.
2018-04-18 14:46:10 -04:00
Subv
48d4efbd69 GPU: Pitch textures are now supported, don't assert when encountering them. 2018-04-18 12:52:53 -05:00
Subv
a3e82e8e1f GLCache: Take into account the texture's block height when caching and unswizzling. 2018-04-18 12:52:53 -05:00
Subv
ac09b5a2e9 GLCache: Added a function to convert cached PixelFormats back to texture formats.
TODO: The way we handle cached formats must change, framebuffer and texture formats are too different to keep them in the same place.
2018-04-18 12:52:52 -05:00
Subv
6b63aaa5b4 GPU: Allow using a configurable block height when unswizzling textures. 2018-04-18 12:52:51 -05:00
Subv
db5f2bfa7e GPU/TIC: Added the pitch and block height fields to the TIC structure. 2018-04-18 11:38:39 -05:00
bunnei
c93ea96366 Merge pull request #346 from bunnei/misc-gpu-improvements
Misc gpu improvements
2018-04-17 22:17:07 -04:00
bunnei
71b4a3b9f6 Merge pull request #344 from bunnei/shader-decompiler-p2
Shader decompiler changes part 2
2018-04-17 22:10:53 -04:00
bunnei
9dc0d13ba5 Merge pull request #345 from bunnei/blending
renderer_opengl: Implement BlendEquation and BlendFunc.
2018-04-17 21:45:36 -04:00
bunnei
7222d9a4c3 gl_rasterizer_cache: Add missing LOG statements. 2018-04-17 21:44:36 -04:00
bunnei
9df8e924fb texture: Add missing formats. 2018-04-17 21:41:36 -04:00
bunnei
3ed8a1cac7 gpu: Add several framebuffer formats to RenderTargetFormat. 2018-04-17 21:40:38 -04:00
bunnei
4a8eb6745e maxwell3d: Allow Texture2DNoMipmap as Texture2D. 2018-04-17 21:39:15 -04:00
bunnei
531c25386e shader_bytecode: Make ctor's constexpr and explicit. 2018-04-17 21:27:07 -04:00
bunnei
174cba5c58 renderer_opengl: Implement BlendEquation and BlendFunc. 2018-04-17 18:11:48 -04:00
bunnei
e59126809c bit_field: Remove is_pod check, add is_trivially_copyable_v. 2018-04-17 18:00:18 -04:00
bunnei
1f6fe062ca gl_shader_decompiler: Fix warnings with MarkAsUsed. 2018-04-17 16:36:44 -04:00
bunnei
ed542a7309 gl_shader_decompiler: Cleanup logging, updating to NGLOG_*. 2018-04-17 16:36:44 -04:00
bunnei
ef2d5ab0c1 gl_shader_decompiler: Implement several MUFU subops and abs_d. 2018-04-17 16:36:43 -04:00
bunnei
59f4ff4659 gl_shader_decompiler: Fix swizzle in GetRegister. 2018-04-17 16:36:42 -04:00
bunnei
5a28dce9eb gl_shader_decompiler: Implement FMUL/FADD/FFMA immediate instructions. 2018-04-17 16:36:42 -04:00
bunnei
8d4899d6ea gl_shader_decompiler: Allow vertex position to be used in fragment shader. 2018-04-17 16:36:40 -04:00
bunnei
95144cc39c gl_shader_decompiler: Implement IPA instruction. 2018-04-17 16:36:39 -04:00
bunnei
8b4443c966 gl_shader_decompiler: Add support for TEXS instruction. 2018-04-17 16:36:38 -04:00
bunnei
5ba71369ac gl_shader_decompiler: Use fragment output color for GPR 0-3. 2018-04-17 15:25:54 -04:00
bunnei
5d529698c9 gl_shader_decompiler: Partially implement MUFU. 2018-04-17 15:25:54 -04:00
bunnei
5b9bcbf438 Merge pull request #341 from shinyquagsire23/pfs-hfs-impl
file_sys: Add HFS/PFS helper component
2018-04-17 14:39:20 -04:00
bunnei
2b082e2710 Merge pull request #343 from Subv/tex_wrap_4
GPU: Implement some wrap modes
2018-04-17 12:25:24 -04:00
shinyquagsire23
de580ccdd5 file_sys: Use NGLOG 2018-04-17 09:55:29 -06:00
Hexagon12
e52a87b98a Various service name fixes - part 2 (rebased) (#322)
* Updated ACC with more service names

* Updated SVC with more service names

* Updated set with more service names

* Updated sockets with more service names

* Updated SPL with more service names

* Updated time with more service names

* Updated vi with more service names
2018-04-17 11:37:43 -04:00
bunnei
0905dc1ff4 Merge pull request #342 from bunnei/indexed-verts
Implement indexed mode rendering
2018-04-17 11:34:22 -04:00
Subv
636ad34707 MaxwellToGL: Implemented tex wrap mode 1 (Wrap, GL_REPEAT). 2018-04-17 10:17:18 -05:00
Subv
7fc516cc1a MaxwellToGL: Added a TODO and partial implementation of maxwell wrap mode 4 (Clamp, GL_CLAMP).
This clamp mode was removed from OpenGL as of 3.1, we can emulate it by using GL_CLAMP_TO_BORDER to get the border color of the texture, and then manually sampling the edge to mix them in the fragment shader.
2018-04-17 10:16:50 -05:00
bunnei
77bdc49343 gl_rendering: Use NGLOG* for changed code. 2018-04-16 21:23:28 -04:00
bunnei
1a1af3fda3 gl_rasterizer: Implement indexed vertex mode. 2018-04-16 21:10:15 -04:00
shinyquagsire23
83aa38b239 file_sys: tweaks 2018-04-16 06:51:59 -06:00
shinyquagsire23
c03795300a file_sys: Add HFS/PFS helper component 2018-04-16 04:36:25 -06:00
bunnei
44e09ba807 Merge pull request #338 from bunnei/unrequire-shared-font
pl_u: Use empty shared font if none is available.
2018-04-15 16:54:36 -04:00
bunnei
d6d7d0989c Merge pull request #337 from Subv/used_buffers
GPU: Don't use explicit binding points when uploading the constbuffers to opengl
2018-04-15 16:30:57 -04:00
bunnei
ac628f139d pl_u: Use empty shared font if none is available.
- Makes games work in lieu of shared_font.bin.
2018-04-15 16:15:34 -04:00
bunnei
b8825fbf10 Merge pull request #335 from bunnei/delete-file
fsp_srv: Implement DeleteFile.
2018-04-15 15:13:02 -04:00
bunnei
b60834ac41 Merge pull request #334 from Subv/used_buffers
GPU: Use the buffer hints from the shader decompiler to upload only the necessary const buffers for each shader stage
2018-04-15 13:17:30 -04:00
bunnei
bddad50dd4 fsp_srv: Implement DeleteFile.
- Used by Binding of Isaac.
2018-04-15 13:15:18 -04:00
41 changed files with 1141 additions and 245 deletions

View File

@@ -192,11 +192,6 @@ private:
static_assert(position < 8 * sizeof(T), "Invalid position");
static_assert(bits <= 8 * sizeof(T), "Invalid number of bits");
static_assert(bits > 0, "Invalid number of bits");
static_assert(std::is_pod<T>::value, "Invalid base type");
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField");
};
#pragma pack()
#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
static_assert(std::is_trivially_copyable<BitField<0, 1, unsigned>>::value,
"BitField must be trivially copyable");
#endif

View File

@@ -12,6 +12,8 @@ add_library(core STATIC
file_sys/errors.h
file_sys/filesystem.cpp
file_sys/filesystem.h
file_sys/partition_filesystem.cpp
file_sys/partition_filesystem.h
file_sys/path_parser.cpp
file_sys/path_parser.h
file_sys/program_metadata.cpp

View File

@@ -57,10 +57,14 @@ ResultVal<std::unique_ptr<StorageBackend>> Disk_FileSystem::OpenFile(const std::
std::make_unique<Disk_Storage>(std::move(file)));
}
ResultCode Disk_FileSystem::DeleteFile(const Path& path) const {
LOG_WARNING(Service_FS, "(STUBBED) called");
// TODO(bunnei): Use correct error code
return ResultCode(-1);
ResultCode Disk_FileSystem::DeleteFile(const std::string& path) const {
if (!FileUtil::Exists(path)) {
return ERROR_PATH_NOT_FOUND;
}
FileUtil::Delete(path);
return RESULT_SUCCESS;
}
ResultCode Disk_FileSystem::RenameFile(const Path& src_path, const Path& dest_path) const {

View File

@@ -25,7 +25,7 @@ public:
ResultVal<std::unique_ptr<StorageBackend>> OpenFile(const std::string& path,
Mode mode) const override;
ResultCode DeleteFile(const Path& path) const override;
ResultCode DeleteFile(const std::string& path) const override;
ResultCode RenameFile(const Path& src_path, const Path& dest_path) const override;
ResultCode DeleteDirectory(const Path& path) const override;
ResultCode DeleteDirectoryRecursively(const Path& path) const override;

View File

@@ -97,7 +97,7 @@ public:
* @param path Path relative to the archive
* @return Result of the operation
*/
virtual ResultCode DeleteFile(const Path& path) const = 0;
virtual ResultCode DeleteFile(const std::string& path) const = 0;
/**
* Create a directory specified by its path

View File

@@ -0,0 +1,125 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cinttypes>
#include <utility>
#include "common/file_util.h"
#include "common/logging/log.h"
#include "core/file_sys/partition_filesystem.h"
#include "core/loader/loader.h"
namespace FileSys {
Loader::ResultStatus PartitionFilesystem::Load(const std::string& file_path, size_t offset) {
FileUtil::IOFile file(file_path, "rb");
if (!file.IsOpen())
return Loader::ResultStatus::Error;
// At least be as large as the header
if (file.GetSize() < sizeof(Header))
return Loader::ResultStatus::Error;
// For cartridges, HFSs can get very large, so we need to calculate the size up to
// the actual content itself instead of just blindly reading in the entire file.
Header pfs_header;
if (!file.ReadBytes(&pfs_header, sizeof(Header)))
return Loader::ResultStatus::Error;
bool is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0);
size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
size_t metadata_size =
sizeof(Header) + (pfs_header.num_entries * entry_size) + pfs_header.strtab_size;
// Actually read in now...
file.Seek(offset, SEEK_SET);
std::vector<u8> file_data(metadata_size);
if (!file.ReadBytes(file_data.data(), metadata_size))
return Loader::ResultStatus::Error;
Loader::ResultStatus result = Load(file_data);
if (result != Loader::ResultStatus::Success)
LOG_ERROR(Service_FS, "Failed to load PFS from file %s!", file_path.c_str());
return result;
}
Loader::ResultStatus PartitionFilesystem::Load(const std::vector<u8>& file_data, size_t offset) {
size_t total_size = file_data.size() - offset;
if (total_size < sizeof(Header))
return Loader::ResultStatus::Error;
memcpy(&pfs_header, &file_data[offset], sizeof(Header));
is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0);
size_t entries_offset = offset + sizeof(Header);
size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
size_t strtab_offset = entries_offset + (pfs_header.num_entries * entry_size);
for (u16 i = 0; i < pfs_header.num_entries; i++) {
FileEntry entry;
memcpy(&entry.fs_entry, &file_data[entries_offset + (i * entry_size)], sizeof(FSEntry));
entry.name = std::string(reinterpret_cast<const char*>(
&file_data[strtab_offset + entry.fs_entry.strtab_offset]));
pfs_entries.push_back(std::move(entry));
}
content_offset = strtab_offset + pfs_header.strtab_size;
return Loader::ResultStatus::Success;
}
u32 PartitionFilesystem::GetNumEntries() const {
return pfs_header.num_entries;
}
u64 PartitionFilesystem::GetEntryOffset(int index) const {
if (index > GetNumEntries())
return 0;
return content_offset + pfs_entries[index].fs_entry.offset;
}
u64 PartitionFilesystem::GetEntrySize(int index) const {
if (index > GetNumEntries())
return 0;
return pfs_entries[index].fs_entry.size;
}
std::string PartitionFilesystem::GetEntryName(int index) const {
if (index > GetNumEntries())
return "";
return pfs_entries[index].name;
}
u64 PartitionFilesystem::GetFileOffset(const std::string& name) const {
for (u32 i = 0; i < pfs_header.num_entries; i++) {
if (pfs_entries[i].name == name)
return content_offset + pfs_entries[i].fs_entry.offset;
}
return 0;
}
u64 PartitionFilesystem::GetFileSize(const std::string& name) const {
for (u32 i = 0; i < pfs_header.num_entries; i++) {
if (pfs_entries[i].name == name)
return pfs_entries[i].fs_entry.size;
}
return 0;
}
void PartitionFilesystem::Print() const {
NGLOG_DEBUG(Service_FS, "Magic: {:.4}", pfs_header.magic.data());
NGLOG_DEBUG(Service_FS, "Files: {}", pfs_header.num_entries);
for (u32 i = 0; i < pfs_header.num_entries; i++) {
NGLOG_DEBUG(Service_FS, " > File {}: {} (0x{:X} bytes, at 0x{:X})", i,
pfs_entries[i].name.c_str(), pfs_entries[i].fs_entry.size,
GetFileOffset(pfs_entries[i].name));
}
}
} // namespace FileSys

View File

@@ -0,0 +1,87 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <string>
#include <vector>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/swap.h"
namespace Loader {
enum class ResultStatus;
}
namespace FileSys {
/**
* Helper which implements an interface to parse PFS/HFS filesystems.
* Data can either be loaded from a file path or data with an offset into it.
*/
class PartitionFilesystem {
public:
Loader::ResultStatus Load(const std::string& file_path, size_t offset = 0);
Loader::ResultStatus Load(const std::vector<u8>& file_data, size_t offset = 0);
u32 GetNumEntries() const;
u64 GetEntryOffset(int index) const;
u64 GetEntrySize(int index) const;
std::string GetEntryName(int index) const;
u64 GetFileOffset(const std::string& name) const;
u64 GetFileSize(const std::string& name) const;
void Print() const;
private:
struct Header {
std::array<char, 4> magic;
u32_le num_entries;
u32_le strtab_size;
INSERT_PADDING_BYTES(0x4);
};
static_assert(sizeof(Header) == 0x10, "PFS/HFS header structure size is wrong");
#pragma pack(push, 1)
struct FSEntry {
u64_le offset;
u64_le size;
u32_le strtab_offset;
};
static_assert(sizeof(FSEntry) == 0x14, "FS entry structure size is wrong");
struct PFSEntry {
FSEntry fs_entry;
INSERT_PADDING_BYTES(0x4);
};
static_assert(sizeof(PFSEntry) == 0x18, "PFS entry structure size is wrong");
struct HFSEntry {
FSEntry fs_entry;
u32_le hash_region_size;
INSERT_PADDING_BYTES(0x8);
std::array<char, 0x20> hash;
};
static_assert(sizeof(HFSEntry) == 0x40, "HFS entry structure size is wrong");
#pragma pack(pop)
struct FileEntry {
FSEntry fs_entry;
std::string name;
};
Header pfs_header;
bool is_hfs;
size_t content_offset;
std::vector<FileEntry> pfs_entries;
};
} // namespace FileSys

View File

@@ -20,7 +20,7 @@ ResultVal<std::unique_ptr<StorageBackend>> RomFS_FileSystem::OpenFile(const std:
std::make_unique<RomFS_Storage>(romfs_file, data_offset, data_size));
}
ResultCode RomFS_FileSystem::DeleteFile(const Path& path) const {
ResultCode RomFS_FileSystem::DeleteFile(const std::string& path) const {
LOG_CRITICAL(Service_FS, "Attempted to delete a file from an ROMFS archive (%s).",
GetName().c_str());
// TODO(bunnei): Use correct error code

View File

@@ -31,7 +31,7 @@ public:
ResultVal<std::unique_ptr<StorageBackend>> OpenFile(const std::string& path,
Mode mode) const override;
ResultCode DeleteFile(const Path& path) const override;
ResultCode DeleteFile(const std::string& path) const override;
ResultCode RenameFile(const Path& src_path, const Path& dest_path) const override;
ResultCode DeleteDirectory(const Path& path) const override;
ResultCode DeleteDirectoryRecursively(const Path& path) const override;

View File

@@ -861,14 +861,14 @@ static const FunctionDef SVC_Table[] = {
{0x2B, nullptr, "FlushDataCache"},
{0x2C, nullptr, "MapPhysicalMemory"},
{0x2D, nullptr, "UnmapPhysicalMemory"},
{0x2E, nullptr, "Unknown"},
{0x2E, nullptr, "GetNextThreadInfo"},
{0x2F, nullptr, "GetLastThreadInfo"},
{0x30, nullptr, "GetResourceLimitLimitValue"},
{0x31, nullptr, "GetResourceLimitCurrentValue"},
{0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"},
{0x33, SvcWrap<GetThreadContext>, "GetThreadContext"},
{0x34, nullptr, "Unknown"},
{0x35, nullptr, "Unknown"},
{0x34, nullptr, "WaitForAddress"},
{0x35, nullptr, "SignalToAddress"},
{0x36, nullptr, "Unknown"},
{0x37, nullptr, "Unknown"},
{0x38, nullptr, "Unknown"},
@@ -876,7 +876,7 @@ static const FunctionDef SVC_Table[] = {
{0x3A, nullptr, "Unknown"},
{0x3B, nullptr, "Unknown"},
{0x3C, nullptr, "DumpInfo"},
{0x3D, nullptr, "Unknown"},
{0x3D, nullptr, "DumpInfoNew"},
{0x3E, nullptr, "Unknown"},
{0x3F, nullptr, "Unknown"},
{0x40, nullptr, "CreateSession"},
@@ -887,9 +887,9 @@ static const FunctionDef SVC_Table[] = {
{0x45, nullptr, "CreateEvent"},
{0x46, nullptr, "Unknown"},
{0x47, nullptr, "Unknown"},
{0x48, nullptr, "Unknown"},
{0x49, nullptr, "Unknown"},
{0x4A, nullptr, "Unknown"},
{0x48, nullptr, "AllocateUnsafeMemory"},
{0x49, nullptr, "FreeUnsafeMemory"},
{0x4A, nullptr, "SetUnsafeAllocationLimit"},
{0x4B, nullptr, "CreateJitMemory"},
{0x4C, nullptr, "MapJitMemory"},
{0x4D, nullptr, "SleepSystem"},
@@ -926,7 +926,7 @@ static const FunctionDef SVC_Table[] = {
{0x6C, nullptr, "SetHardwareBreakPoint"},
{0x6D, nullptr, "GetDebugThreadParam"},
{0x6E, nullptr, "Unknown"},
{0x6F, nullptr, "Unknown"},
{0x6F, nullptr, "GetMemoryInfo"},
{0x70, nullptr, "CreatePort"},
{0x71, nullptr, "ManageNamedPort"},
{0x72, nullptr, "ConnectToPort"},

View File

@@ -38,7 +38,10 @@ class IProfile final : public ServiceFramework<IProfile> {
public:
IProfile() : ServiceFramework("IProfile") {
static const FunctionInfo functions[] = {
{0, nullptr, "Get"},
{1, &IProfile::GetBase, "GetBase"},
{10, nullptr, "GetImageSize"},
{11, nullptr, "LoadImage"},
};
RegisterHandlers(functions);
}
@@ -59,6 +62,11 @@ public:
static const FunctionInfo functions[] = {
{0, &IManagerForApplication::CheckAvailability, "CheckAvailability"},
{1, &IManagerForApplication::GetAccountId, "GetAccountId"},
{2, nullptr, "EnsureIdTokenCacheAsync"},
{3, nullptr, "LoadIdTokenCache"},
{130, nullptr, "GetNintendoAccountUserResourceCacheForApplication"},
{150, nullptr, "CreateAuthorizationRequest"},
{160, nullptr, "StoreOpenContext"},
};
RegisterHandlers(functions);
}

View File

@@ -236,7 +236,7 @@ public:
: ServiceFramework("IFileSystem"), backend(std::move(backend)) {
static const FunctionInfo functions[] = {
{0, &IFileSystem::CreateFile, "CreateFile"},
{1, nullptr, "DeleteFile"},
{1, &IFileSystem::DeleteFile, "DeleteFile"},
{2, &IFileSystem::CreateDirectory, "CreateDirectory"},
{3, nullptr, "DeleteDirectory"},
{4, nullptr, "DeleteDirectoryRecursively"},
@@ -273,6 +273,20 @@ public:
rb.Push(backend->CreateFile(name, size));
}
void DeleteFile(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
auto file_buffer = ctx.ReadBuffer();
auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
std::string name(file_buffer.begin(), end);
LOG_DEBUG(Service_FS, "called file %s", name.c_str());
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend->DeleteFile(name));
}
void CreateDirectory(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};

View File

@@ -47,10 +47,10 @@ PL_U::PL_U() : ServiceFramework("pl:u") {
FileUtil::CreateFullPath(filepath); // Create path if not already created
FileUtil::IOFile file(filepath, "rb");
shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE);
if (file.IsOpen()) {
// Read shared font data
ASSERT(file.GetSize() == SHARED_FONT_MEM_SIZE);
shared_font = std::make_shared<std::vector<u8>>(static_cast<size_t>(file.GetSize()));
file.ReadBytes(shared_font->data(), shared_font->size());
} else {
LOG_WARNING(Service_NS, "Unable to load shared font: %s", filepath.c_str());
@@ -97,22 +97,19 @@ void PL_U::GetSharedMemoryAddressOffset(Kernel::HLERequestContext& ctx) {
}
void PL_U::GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx) {
if (shared_font != nullptr) {
// TODO(bunnei): This is a less-than-ideal solution to load a RAM dump of the Switch shared
// font data. This (likely) relies on exact address, size, and offsets from the original
// dump. In the future, we need to replace this with a more robust solution.
// TODO(bunnei): This is a less-than-ideal solution to load a RAM dump of the Switch shared
// font data. This (likely) relies on exact address, size, and offsets from the original
// dump. In the future, we need to replace this with a more robust solution.
// Map backing memory for the font data
Core::CurrentProcess()->vm_manager.MapMemoryBlock(SHARED_FONT_MEM_VADDR, shared_font, 0,
SHARED_FONT_MEM_SIZE,
Kernel::MemoryState::Shared);
// Map backing memory for the font data
Core::CurrentProcess()->vm_manager.MapMemoryBlock(
SHARED_FONT_MEM_VADDR, shared_font, 0, SHARED_FONT_MEM_SIZE, Kernel::MemoryState::Shared);
// Create shared font memory object
shared_font_mem = Kernel::SharedMemory::Create(
Core::CurrentProcess(), SHARED_FONT_MEM_SIZE, Kernel::MemoryPermission::ReadWrite,
Kernel::MemoryPermission::Read, SHARED_FONT_MEM_VADDR, Kernel::MemoryRegion::BASE,
"PL_U:shared_font_mem");
}
// Create shared font memory object
shared_font_mem = Kernel::SharedMemory::Create(
Core::CurrentProcess(), SHARED_FONT_MEM_SIZE, Kernel::MemoryPermission::ReadWrite,
Kernel::MemoryPermission::Read, SHARED_FONT_MEM_VADDR, Kernel::MemoryRegion::BASE,
"PL_U:shared_font_mem");
LOG_DEBUG(Service_NS, "called");
IPC::ResponseBuilder rb{ctx, 2, 1};

View File

@@ -36,6 +36,7 @@ SET::SET() : ServiceFramework("set") {
{5, nullptr, "GetAvailableLanguageCodes2"},
{6, nullptr, "GetAvailableLanguageCodeCount2"},
{7, nullptr, "GetKeyCodeMap"},
{8, nullptr, "GetQuestFlag"},
};
RegisterHandlers(functions);
}

View File

@@ -32,6 +32,15 @@ SET_CAL::SET_CAL() : ServiceFramework("set:cal") {
{21, nullptr, "GetEticketDeviceKey"},
{22, nullptr, "GetSpeakerParameter"},
{23, nullptr, "GetLcdVendorId"},
{24, nullptr, "GetEciDeviceCertificate2"},
{25, nullptr, "GetEciDeviceKey2"},
{26, nullptr, "GetAmiiboKey"},
{27, nullptr, "GetAmiiboEcqvCertificate"},
{28, nullptr, "GetAmiiboEcdsaCertificate"},
{29, nullptr, "GetAmiiboEcqvBlsKey"},
{30, nullptr, "GetAmiiboEcqvBlsCertificate"},
{31, nullptr, "GetAmiiboEcqvBlsRootCertificate"},
{32, nullptr, "GetUnknownId"},
};
RegisterHandlers(functions);
}

View File

@@ -27,6 +27,7 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
{2, nullptr, "GetNetworkSettings"},
{3, nullptr, "GetFirmwareVersion"},
{4, nullptr, "GetFirmwareVersion2"},
{5, nullptr, "GetFirmwareVersionDigest"},
{7, nullptr, "GetLockScreenFlag"},
{8, nullptr, "SetLockScreenFlag"},
{9, nullptr, "GetBacklightSettings"},
@@ -159,6 +160,15 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
{138, nullptr, "GetWebInspectorFlag"},
{139, nullptr, "GetAllowedSslHosts"},
{140, nullptr, "GetHostFsMountPoint"},
{141, nullptr, "GetRequiresRunRepairTimeReviser"},
{142, nullptr, "SetRequiresRunRepairTimeReviser"},
{143, nullptr, "SetBlePairingSettings"},
{144, nullptr, "GetBlePairingSettings"},
{145, nullptr, "GetConsoleSixAxisSensorAngularVelocityTimeBias"},
{146, nullptr, "SetConsoleSixAxisSensorAngularVelocityTimeBias"},
{147, nullptr, "GetConsoleSixAxisSensorAngularAcceleration"},
{148, nullptr, "SetConsoleSixAxisSensorAngularAcceleration"},
{149, nullptr, "GetRebootlessSystemUpdateVersion"},
};
RegisterHandlers(functions);
}

View File

@@ -79,9 +79,34 @@ BSD::BSD(const char* name) : ServiceFramework(name) {
{0, &BSD::RegisterClient, "RegisterClient"},
{1, &BSD::StartMonitoring, "StartMonitoring"},
{2, &BSD::Socket, "Socket"},
{3, nullptr, "SocketExempt"},
{4, nullptr, "Open"},
{5, nullptr, "Select"},
{6, nullptr, "Poll"},
{7, nullptr, "Sysctl"},
{8, nullptr, "Recv"},
{9, nullptr, "RecvFrom"},
{10, nullptr, "Send"},
{11, &BSD::SendTo, "SendTo"},
{12, nullptr, "Accept"},
{13, nullptr, "Bind"},
{14, &BSD::Connect, "Connect"},
{15, nullptr, "GetPeerName"},
{16, nullptr, "GetSockName"},
{17, nullptr, "GetSockOpt"},
{18, nullptr, "Listen"},
{19, nullptr, "Ioctl"},
{20, nullptr, "Fcntl"},
{21, nullptr, "SetSockOpt"},
{22, nullptr, "Shutdown"},
{23, nullptr, "ShutdownAllSockets"},
{24, nullptr, "Write"},
{25, nullptr, "Read"},
{26, &BSD::Close, "Close"},
{27, nullptr, "DuplicateSocket"},
{28, nullptr, "GetResourceStatistics"},
{29, nullptr, "RecvMMsg"},
{30, nullptr, "SendMMsg"},
};
RegisterHandlers(functions);
}

View File

@@ -30,6 +30,7 @@ SFDNSRES::SFDNSRES() : ServiceFramework("sfdnsres") {
{7, nullptr, "GetNameInfo"},
{8, nullptr, "RequestCancelHandle"},
{9, nullptr, "CancelSocketCall"},
{11, nullptr, "ClearDnsIpServerAddressArray"},
};
RegisterHandlers(functions);
}

View File

@@ -33,6 +33,12 @@ SPL::SPL(std::shared_ptr<Module> module) : Module::Interface(std::move(module),
{23, nullptr, "GetSplWaitEvent"},
{24, nullptr, "SetSharedData"},
{25, nullptr, "GetSharedData"},
{26, nullptr, "ImportSslRsaKey"},
{27, nullptr, "SecureExpModWithSslKey"},
{28, nullptr, "ImportEsRsaKey"},
{29, nullptr, "SecureExpModWithEsKey"},
{30, nullptr, "EncryptManuRsaKeyForImport"},
{31, nullptr, "GetPackage2Hash"},
};
RegisterHandlers(functions);
}

View File

@@ -20,7 +20,11 @@ public:
ISystemClock() : ServiceFramework("ISystemClock") {
static const FunctionInfo functions[] = {
{0, &ISystemClock::GetCurrentTime, "GetCurrentTime"},
{2, &ISystemClock::GetSystemClockContext, "GetSystemClockContext"}};
{1, nullptr, "SetCurrentTime"},
{2, &ISystemClock::GetSystemClockContext, "GetSystemClockContext"},
{3, nullptr, "SetSystemClockContext"},
};
RegisterHandlers(functions);
}

View File

@@ -14,6 +14,17 @@ TIME_S::TIME_S(std::shared_ptr<Module> time) : Module::Interface(std::move(time)
{2, &TIME_S::GetStandardSteadyClock, "GetStandardSteadyClock"},
{3, &TIME_S::GetTimeZoneService, "GetTimeZoneService"},
{4, &TIME_S::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"},
{5, nullptr, "GetEphemeralNetworkSystemClock"},
{50, nullptr, "SetStandardSteadyClockInternalOffset"},
{100, nullptr, "IsStandardUserSystemClockAutomaticCorrectionEnabled"},
{101, nullptr, "SetStandardUserSystemClockAutomaticCorrectionEnabled"},
{102, nullptr, "GetStandardUserSystemClockInitialYear"},
{200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"},
{300, nullptr, "CalculateMonotonicSystemClockBaseTimePoint"},
{400, nullptr, "GetClockSnapshot"},
{401, nullptr, "GetClockSnapshotFromSystemClockContext"},
{500, nullptr, "CalculateStandardUserSystemClockDifferenceByUser"},
{501, nullptr, "CalculateSpanBetween"},
};
RegisterHandlers(functions);
}

View File

@@ -14,6 +14,17 @@ TIME_U::TIME_U(std::shared_ptr<Module> time) : Module::Interface(std::move(time)
{2, &TIME_U::GetStandardSteadyClock, "GetStandardSteadyClock"},
{3, &TIME_U::GetTimeZoneService, "GetTimeZoneService"},
{4, &TIME_U::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"},
{5, nullptr, "GetEphemeralNetworkSystemClock"},
{50, nullptr, "SetStandardSteadyClockInternalOffset"},
{100, nullptr, "IsStandardUserSystemClockAutomaticCorrectionEnabled"},
{101, nullptr, "SetStandardUserSystemClockAutomaticCorrectionEnabled"},
{102, nullptr, "GetStandardUserSystemClockInitialYear"},
{200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"},
{300, nullptr, "CalculateMonotonicSystemClockBaseTimePoint"},
{400, nullptr, "GetClockSnapshot"},
{401, nullptr, "GetClockSnapshotFromSystemClockContext"},
{500, nullptr, "CalculateStandardUserSystemClockDifferenceByUser"},
{501, nullptr, "CalculateSpanBetween"},
};
RegisterHandlers(functions);
}

View File

@@ -580,7 +580,48 @@ public:
ISystemDisplayService() : ServiceFramework("ISystemDisplayService") {
static const FunctionInfo functions[] = {
{1200, nullptr, "GetZOrderCountMin"},
{1202, nullptr, "GetZOrderCountMax"},
{1203, nullptr, "GetDisplayLogicalResolution"},
{1204, nullptr, "SetDisplayMagnification"},
{2201, nullptr, "SetLayerPosition"},
{2203, nullptr, "SetLayerSize"},
{2204, nullptr, "GetLayerZ"},
{2205, &ISystemDisplayService::SetLayerZ, "SetLayerZ"},
{2207, nullptr, "SetLayerVisibility"},
{2209, nullptr, "SetLayerAlpha"},
{2312, nullptr, "CreateStrayLayer"},
{2400, nullptr, "OpenIndirectLayer"},
{2401, nullptr, "CloseIndirectLayer"},
{2402, nullptr, "FlipIndirectLayer"},
{3000, nullptr, "ListDisplayModes"},
{3001, nullptr, "ListDisplayRgbRanges"},
{3002, nullptr, "ListDisplayContentTypes"},
{3200, nullptr, "GetDisplayMode"},
{3201, nullptr, "SetDisplayMode"},
{3202, nullptr, "GetDisplayUnderscan"},
{3203, nullptr, "SetDisplayUnderscan"},
{3204, nullptr, "GetDisplayContentType"},
{3205, nullptr, "SetDisplayContentType"},
{3206, nullptr, "GetDisplayRgbRange"},
{3207, nullptr, "SetDisplayRgbRange"},
{3208, nullptr, "GetDisplayCmuMode"},
{3209, nullptr, "SetDisplayCmuMode"},
{3210, nullptr, "GetDisplayContrastRatio"},
{3211, nullptr, "SetDisplayContrastRatio"},
{3214, nullptr, "GetDisplayGamma"},
{3215, nullptr, "SetDisplayGamma"},
{3216, nullptr, "GetDisplayCmuLuma"},
{3217, nullptr, "SetDisplayCmuLuma"},
{8225, nullptr, "GetSharedBufferMemoryHandleId"},
{8250, nullptr, "OpenSharedLayer"},
{8251, nullptr, "CloseSharedLayer"},
{8252, nullptr, "ConnectSharedLayer"},
{8253, nullptr, "DisconnectSharedLayer"},
{8254, nullptr, "AcquireSharedFrameBuffer"},
{8255, nullptr, "PresentSharedFrameBuffer"},
{8256, nullptr, "GetSharedFrameBufferAcquirableEvent"},
{8257, nullptr, "FillSharedFrameBufferColor"},
{8258, nullptr, "CancelSharedFrameBuffer"},
};
RegisterHandlers(functions);
}
@@ -603,10 +644,72 @@ public:
explicit IManagerDisplayService(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
: ServiceFramework("IManagerDisplayService"), nv_flinger(std::move(nv_flinger)) {
static const FunctionInfo functions[] = {
{200, nullptr, "AllocateProcessHeapBlock"},
{201, nullptr, "FreeProcessHeapBlock"},
{1020, &IManagerDisplayService::CloseDisplay, "CloseDisplay"},
{1102, nullptr, "GetDisplayResolution"},
{2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
{2011, nullptr, "DestroyManagedLayer"},
{2050, nullptr, "CreateIndirectLayer"},
{2051, nullptr, "DestroyIndirectLayer"},
{2052, nullptr, "CreateIndirectProducerEndPoint"},
{2053, nullptr, "DestroyIndirectProducerEndPoint"},
{2054, nullptr, "CreateIndirectConsumerEndPoint"},
{2055, nullptr, "DestroyIndirectConsumerEndPoint"},
{2300, nullptr, "AcquireLayerTexturePresentingEvent"},
{2301, nullptr, "ReleaseLayerTexturePresentingEvent"},
{2302, nullptr, "GetDisplayHotplugEvent"},
{2402, nullptr, "GetDisplayHotplugState"},
{2501, nullptr, "GetCompositorErrorInfo"},
{2601, nullptr, "GetDisplayErrorEvent"},
{4201, nullptr, "SetDisplayAlpha"},
{4203, nullptr, "SetDisplayLayerStack"},
{4205, nullptr, "SetDisplayPowerState"},
{4206, nullptr, "SetDefaultDisplay"},
{6000, &IManagerDisplayService::AddToLayerStack, "AddToLayerStack"},
{6001, nullptr, "RemoveFromLayerStack"},
{6002, nullptr, "SetLayerVisibility"},
{6003, nullptr, "SetLayerConfig"},
{6004, nullptr, "AttachLayerPresentationTracer"},
{6005, nullptr, "DetachLayerPresentationTracer"},
{6006, nullptr, "StartLayerPresentationRecording"},
{6007, nullptr, "StopLayerPresentationRecording"},
{6008, nullptr, "StartLayerPresentationFenceWait"},
{6009, nullptr, "StopLayerPresentationFenceWait"},
{6010, nullptr, "GetLayerPresentationAllFencesExpiredEvent"},
{7000, nullptr, "SetContentVisibility"},
{8000, nullptr, "SetConductorLayer"},
{8100, nullptr, "SetIndirectProducerFlipOffset"},
{8200, nullptr, "CreateSharedBufferStaticStorage"},
{8201, nullptr, "CreateSharedBufferTransferMemory"},
{8202, nullptr, "DestroySharedBuffer"},
{8203, nullptr, "BindSharedLowLevelLayerToManagedLayer"},
{8204, nullptr, "BindSharedLowLevelLayerToIndirectLayer"},
{8207, nullptr, "UnbindSharedLowLevelLayer"},
{8208, nullptr, "ConnectSharedLowLevelLayerToSharedBuffer"},
{8209, nullptr, "DisconnectSharedLowLevelLayerFromSharedBuffer"},
{8210, nullptr, "CreateSharedLayer"},
{8211, nullptr, "DestroySharedLayer"},
{8216, nullptr, "AttachSharedLayerToLowLevelLayer"},
{8217, nullptr, "ForceDetachSharedLayerFromLowLevelLayer"},
{8218, nullptr, "StartDetachSharedLayerFromLowLevelLayer"},
{8219, nullptr, "FinishDetachSharedLayerFromLowLevelLayer"},
{8220, nullptr, "GetSharedLayerDetachReadyEvent"},
{8221, nullptr, "GetSharedLowLevelLayerSynchronizedEvent"},
{8222, nullptr, "CheckSharedLowLevelLayerSynchronized"},
{8223, nullptr, "RegisterSharedBufferImporterAruid"},
{8224, nullptr, "UnregisterSharedBufferImporterAruid"},
{8227, nullptr, "CreateSharedBufferProcessHeap"},
{8228, nullptr, "GetSharedLayerLayerStacks"},
{8229, nullptr, "SetSharedLayerLayerStacks"},
{8291, nullptr, "PresentDetachedSharedFrameBufferToLowLevelLayer"},
{8292, nullptr, "FillDetachedSharedFrameBufferColor"},
{8293, nullptr, "GetDetachedSharedFrameBufferImage"},
{8294, nullptr, "SetDetachedSharedFrameBufferImage"},
{8295, nullptr, "CopyDetachedSharedFrameBufferImage"},
{8296, nullptr, "SetDetachedSharedFrameBufferSubImage"},
{8297, nullptr, "GetSharedFrameBufferContentParameter"},
{8298, nullptr, "ExpandStartupLogoOnSharedFrameBuffer"},
};
RegisterHandlers(functions);
}
@@ -825,13 +928,21 @@ IApplicationDisplayService::IApplicationDisplayService(
"GetIndirectDisplayTransactionService"},
{1000, &IApplicationDisplayService::ListDisplays, "ListDisplays"},
{1010, &IApplicationDisplayService::OpenDisplay, "OpenDisplay"},
{1011, nullptr, "OpenDefaultDisplay"},
{1020, &IApplicationDisplayService::CloseDisplay, "CloseDisplay"},
{1101, nullptr, "SetDisplayEnabled"},
{1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"},
{2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},
{2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"},
{2021, nullptr, "CloseLayer"},
{2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"},
{2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"},
{2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},
{2102, nullptr, "ConvertScalingMode"},
{2450, nullptr, "GetIndirectLayerImageMap"},
{2451, nullptr, "GetIndirectLayerImageCropMap"},
{2460, nullptr, "GetIndirectLayerImageRequiredMemoryInfo"},
{5202, &IApplicationDisplayService::GetDisplayVsyncEvent, "GetDisplayVsyncEvent"},
{5203, nullptr, "GetDisplayVsyncEventForDebug"},
};
RegisterHandlers(functions);
}

View File

@@ -11,7 +11,6 @@ VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>
: Module::Interface(std::move(module), "vi:u", std::move(nv_flinger)) {
static const FunctionInfo functions[] = {
{0, &VI_U::GetDisplayService, "GetDisplayService"},
{3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
};
RegisterHandlers(functions);
}

View File

@@ -74,8 +74,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
regs.reg_array[method] = value;
#define MAXWELL3D_REG_INDEX(field_name) (offsetof(Regs, field_name) / sizeof(u32))
switch (method) {
case MAXWELL3D_REG_INDEX(code_address.code_address_high):
case MAXWELL3D_REG_INDEX(code_address.code_address_low): {
@@ -136,7 +134,7 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
break;
}
#undef MAXWELL3D_REG_INDEX
VideoCore::g_renderer->Rasterizer()->NotifyMaxwellRegisterChanged(method);
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
@@ -165,6 +163,7 @@ void Maxwell3D::ProcessQueryGet() {
void Maxwell3D::DrawArrays() {
LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(),
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
@@ -176,7 +175,8 @@ void Maxwell3D::DrawArrays() {
debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
}
VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/);
const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(is_indexed);
}
void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
@@ -218,10 +218,12 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear,
"TIC versions other than BlockLinear are unimplemented");
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
"TIC versions other than BlockLinear or Pitch are unimplemented");
ASSERT_MSG(tic_entry.texture_type == Texture::TextureType::Texture2D,
ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
(tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),
"Texture types other than Texture2D are unimplemented");
auto r_type = tic_entry.r_type.Value();

View File

@@ -20,6 +20,9 @@
namespace Tegra {
namespace Engines {
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
class Maxwell3D final {
public:
explicit Maxwell3D(MemoryManager& memory_manager);
@@ -248,6 +251,52 @@ public:
Patches = 0xe,
};
enum class IndexFormat : u32 {
UnsignedByte = 0x0,
UnsignedShort = 0x1,
UnsignedInt = 0x2,
};
struct Blend {
enum class Equation : u32 {
Add = 1,
Subtract = 2,
ReverseSubtract = 3,
Min = 4,
Max = 5,
};
enum class Factor : u32 {
Zero = 0x1,
One = 0x2,
SourceColor = 0x3,
OneMinusSourceColor = 0x4,
SourceAlpha = 0x5,
OneMinusSourceAlpha = 0x6,
DestAlpha = 0x7,
OneMinusDestAlpha = 0x8,
DestColor = 0x9,
OneMinusDestColor = 0xa,
SourceAlphaSaturate = 0xb,
Source1Color = 0x10,
OneMinusSource1Color = 0x11,
Source1Alpha = 0x12,
OneMinusSource1Alpha = 0x13,
ConstantColor = 0x61,
OneMinusConstantColor = 0x62,
ConstantAlpha = 0x63,
OneMinusConstantAlpha = 0x64,
};
u32 separate_alpha;
Equation equation_rgb;
Factor factor_source_rgb;
Factor factor_dest_rgb;
Equation equation_a;
Factor factor_source_a;
Factor factor_dest_a;
};
union {
struct {
INSERT_PADDING_WORDS(0x200);
@@ -270,7 +319,15 @@ public:
}
} rt[NumRenderTargets];
INSERT_PADDING_WORDS(0x80);
struct {
f32 scale_x;
f32 scale_y;
f32 scale_z;
u32 translate_x;
u32 translate_y;
u32 translate_z;
INSERT_PADDING_WORDS(2);
} viewport_transform[NumViewports];
struct {
union {
@@ -375,7 +432,42 @@ public:
};
} draw;
INSERT_PADDING_WORDS(0x139);
INSERT_PADDING_WORDS(0x6B);
struct {
u32 start_addr_high;
u32 start_addr_low;
u32 end_addr_high;
u32 end_addr_low;
IndexFormat format;
u32 first;
u32 count;
unsigned FormatSizeInBytes() const {
switch (format) {
case IndexFormat::UnsignedByte:
return 1;
case IndexFormat::UnsignedShort:
return 2;
case IndexFormat::UnsignedInt:
return 4;
}
UNREACHABLE();
}
GPUVAddr StartAddress() const {
return static_cast<GPUVAddr>(
(static_cast<GPUVAddr>(start_addr_high) << 32) | start_addr_low);
}
GPUVAddr EndAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
end_addr_low);
}
} index_array;
INSERT_PADDING_WORDS(0xC7);
struct {
u32 query_address_high;
u32 query_address_low;
@@ -410,7 +502,9 @@ public:
}
} vertex_array[NumVertexArrays];
INSERT_PADDING_WORDS(0x40);
Blend blend;
INSERT_PADDING_WORDS(0x39);
struct {
u32 limit_high;
@@ -563,6 +657,7 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform[0], 0x280);
ASSERT_REG_POSITION(viewport, 0x300);
ASSERT_REG_POSITION(vertex_buffer, 0x35D);
ASSERT_REG_POSITION(zeta, 0x3F8);
@@ -572,8 +667,10 @@ ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(query, 0x6C0);
ASSERT_REG_POSITION(vertex_array[0], 0x700);
ASSERT_REG_POSITION(blend, 0x780);
ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
ASSERT_REG_POSITION(shader_config[0], 0x800);
ASSERT_REG_POSITION(const_buffer, 0x8E0);

View File

@@ -4,6 +4,7 @@
#pragma once
#include <cstring>
#include <map>
#include <string>
#include "common/bit_field.h"
@@ -12,14 +13,10 @@ namespace Tegra {
namespace Shader {
struct Register {
Register() = default;
constexpr Register() = default;
constexpr Register(u64 value) : value(value) {}
constexpr u64 GetIndex() const {
return value;
}
constexpr operator u64() const {
return value;
}
@@ -43,13 +40,13 @@ struct Register {
}
private:
u64 value;
u64 value{};
};
union Attribute {
Attribute() = default;
constexpr Attribute(u64 value) : value(value) {}
constexpr explicit Attribute(u64 value) : value(value) {}
enum class Index : u64 {
Position = 7,
@@ -68,7 +65,20 @@ union Attribute {
} fmt28;
BitField<39, 8, u64> reg;
u64 value;
u64 value{};
};
union Sampler {
Sampler() = default;
constexpr explicit Sampler(u64 value) : value(value) {}
enum class Index : u64 {
Sampler_0 = 8,
};
BitField<36, 13, Index> index;
u64 value{};
};
union Uniform {
@@ -238,7 +248,7 @@ union OpCode {
BitField<55, 9, Id> op3;
BitField<52, 12, Id> op4;
BitField<51, 13, Id> op5;
u64 value;
u64 value{};
};
static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size");
@@ -280,6 +290,7 @@ enum class SubOp : u64 {
Lg2 = 0x3,
Rcp = 0x4,
Rsq = 0x5,
Min = 0x8,
};
union Instruction {
@@ -295,15 +306,25 @@ union Instruction {
BitField<20, 8, Register> gpr20;
BitField<20, 7, SubOp> sub_op;
BitField<28, 8, Register> gpr28;
BitField<36, 13, u64> imm36;
BitField<39, 8, Register> gpr39;
union {
BitField<20, 19, u64> imm20;
BitField<45, 1, u64> negate_b;
BitField<46, 1, u64> abs_a;
BitField<48, 1, u64> negate_a;
BitField<49, 1, u64> abs_b;
BitField<50, 1, u64> abs_d;
BitField<56, 1, u64> negate_imm;
float GetImm20() const {
float result{};
u32 imm{static_cast<u32>(imm20)};
imm <<= 12;
imm |= negate_imm ? 0x80000000 : 0;
std::memcpy(&result, &imm, sizeof(imm));
return result;
}
} alu;
union {
@@ -311,11 +332,13 @@ union Instruction {
BitField<49, 1, u64> negate_c;
} ffma;
BitField<61, 1, u64> is_b_imm;
BitField<60, 1, u64> is_b_gpr;
BitField<59, 1, u64> is_c_gpr;
Attribute attribute;
Uniform uniform;
Sampler sampler;
u64 hex;
};

View File

@@ -15,7 +15,10 @@ namespace Tegra {
enum class RenderTargetFormat : u32 {
NONE = 0x0,
RGBA16_FLOAT = 0xCA,
RGB10_A2_UNORM = 0xD1,
RGBA8_UNORM = 0xD5,
RGBA8_SRGB = 0xD6,
};
class DebugContext;

View File

@@ -19,7 +19,7 @@ public:
virtual void DrawArrays() = 0;
/// Notify rasterizer that the specified Maxwell register has been changed
virtual void NotifyMaxwellRegisterChanged(u32 id) = 0;
virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;
/// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0;

View File

@@ -97,7 +97,6 @@ RasterizerOpenGL::RasterizerOpenGL() {
state.draw.vertex_buffer = stream_buffer->GetHandle();
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.draw.vertex_array = hw_vao.handle;
state.Apply();
@@ -128,17 +127,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
}
}
void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
if (is_indexed) {
UNREACHABLE();
}
// TODO(bunnei): Add support for 1+ vertex arrays
vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride;
}
void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
MICROPROFILE_SCOPE(OpenGL_VAO);
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
@@ -150,6 +138,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
// TODO(bunnei): Add support for 1+ vertex arrays
const auto& vertex_array{regs.vertex_array[0]};
const auto& vertex_array_limit{regs.vertex_array_limit[0]};
ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?");
ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!");
for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) {
@@ -162,6 +151,10 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
// to avoid OpenGL errors.
for (unsigned index = 0; index < 16; ++index) {
auto& attrib = regs.vertex_attrib_format[index];
NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
attrib.offset.Value(), attrib.IsNormalized());
glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride,
reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset));
@@ -170,7 +163,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
}
// Copy vertex array data
const u32 data_size{vertex_array.stride * regs.vertex_buffer.count};
const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1};
const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())};
res_cache.FlushRegion(data_addr, data_size, nullptr);
Memory::ReadBlock(data_addr, array_ptr, data_size);
@@ -333,13 +326,18 @@ void RasterizerOpenGL::DrawArrays() {
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
AnalyzeVertexArray(is_indexed);
const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
// TODO(bunnei): Add support for 1+ vertex arrays
vs_input_size = vertex_num * regs.vertex_array[0].stride;
state.draw.vertex_buffer = stream_buffer->GetHandle();
state.Apply();
size_t buffer_size = static_cast<size_t>(vs_input_size);
if (is_indexed) {
UNREACHABLE();
buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size;
}
// Uniform space for the 5 shader stages
@@ -354,9 +352,18 @@ void RasterizerOpenGL::DrawArrays() {
SetupVertexArray(buffer_ptr, buffer_offset);
ptr_pos += vs_input_size;
// If indexed mode, copy the index buffer
GLintptr index_buffer_offset = 0;
if (is_indexed) {
UNREACHABLE();
ptr_pos = Common::AlignUp(ptr_pos, 4);
const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
const VAddr index_data_addr{
memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())};
Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size);
index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
ptr_pos += index_buffer_size;
}
SetupShaders(buffer_ptr, buffer_offset, ptr_pos);
@@ -366,11 +373,16 @@ void RasterizerOpenGL::DrawArrays() {
shader_program_manager->ApplyTo(state);
state.Apply();
const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
if (is_indexed) {
UNREACHABLE();
const GLint index_min{static_cast<GLint>(regs.index_array.first)};
const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)};
glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count,
MaxwellToGL::IndexFormat(regs.index_array.format),
reinterpret_cast<const void*>(index_buffer_offset),
-index_min);
} else {
glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0,
regs.vertex_buffer.count);
glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count);
}
// Disable scissor test
@@ -434,7 +446,32 @@ void RasterizerOpenGL::BindTextures() {
}
}
void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {}
void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
switch (method) {
case MAXWELL3D_REG_INDEX(blend.separate_alpha):
ASSERT_MSG(false, "unimplemented");
break;
case MAXWELL3D_REG_INDEX(blend.equation_rgb):
state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
break;
case MAXWELL3D_REG_INDEX(blend.factor_source_rgb):
state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
break;
case MAXWELL3D_REG_INDEX(blend.factor_dest_rgb):
state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
break;
case MAXWELL3D_REG_INDEX(blend.equation_a):
state.blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
break;
case MAXWELL3D_REG_INDEX(blend.factor_source_a):
state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
break;
case MAXWELL3D_REG_INDEX(blend.factor_dest_a):
state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
break;
}
}
void RasterizerOpenGL::FlushAll() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@@ -486,9 +523,12 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu
src_params.width = std::min(framebuffer.width, pixel_stride);
src_params.height = framebuffer.height;
src_params.stride = pixel_stride;
src_params.is_tiled = false;
src_params.is_tiled = true;
src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
src_params.pixel_format =
SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
src_params.component_type =
SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format);
src_params.UpdateParams();
MathUtil::Rectangle<u32> src_rect;

View File

@@ -32,7 +32,7 @@ public:
~RasterizerOpenGL() override;
void DrawArrays() override;
void NotifyMaxwellRegisterChanged(u32 id) override;
void NotifyMaxwellRegisterChanged(u32 method) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
@@ -155,7 +155,6 @@ private:
GLsizeiptr vs_input_size;
void AnalyzeVertexArray(bool is_indexed);
void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset);
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;

View File

@@ -36,6 +36,7 @@
using SurfaceType = SurfaceParams::SurfaceType;
using PixelFormat = SurfaceParams::PixelFormat;
using ComponentType = SurfaceParams::ComponentType;
struct FormatTuple {
GLint internal_format;
@@ -47,26 +48,23 @@ struct FormatTuple {
u32 compression_factor;
};
static constexpr std::array<FormatTuple, 1> fb_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8
}};
static constexpr std::array<FormatTuple, 2> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8
static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1}, // ABGR8
{GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1}, // B5G6R5
{GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_10_10_10_2, false, 1}, // RGB10_A2
}};
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
if (type == SurfaceType::Color) {
ASSERT(static_cast<size_t>(pixel_format) < fb_format_tuples.size());
return fb_format_tuples[static_cast<unsigned int>(pixel_format)];
if (type == SurfaceType::ColorTexture) {
ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
// For now only UNORM components are supported
ASSERT(component_type == ComponentType::UNorm);
return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
} else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
// TODO(Subv): Implement depth formats
ASSERT_MSG(false, "Unimplemented");
} else if (type == SurfaceType::Texture) {
ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
}
UNREACHABLE();
@@ -102,39 +100,42 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
}
template <bool morton_to_gl, PixelFormat format>
void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) {
void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start,
VAddr end) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
// configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
Memory::GetPointer(base), gl_buffer, morton_to_gl);
if (morton_to_gl) {
auto data = Tegra::Texture::UnswizzleTexture(
base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height,
block_height);
std::memcpy(gl_buffer, data.data(), data.size());
} else {
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
// the configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
Memory::GetPointer(base), gl_buffer, morton_to_gl);
}
}
template <>
void MortonCopy<true, PixelFormat::DXT1>(u32 stride, u32 height, u8* gl_buffer, VAddr base,
VAddr start, VAddr end) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1);
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
// configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
auto data =
Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height);
std::memcpy(gl_buffer, data.data(), data.size());
}
static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns = {
MortonCopy<true, PixelFormat::RGBA8>,
MortonCopy<true, PixelFormat::DXT1>,
static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
MortonCopy<true, PixelFormat::ABGR8>,
MortonCopy<true, PixelFormat::B5G6R5>,
MortonCopy<true, PixelFormat::DXT1>,
MortonCopy<true, PixelFormat::RGB10_A2>,
};
static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns = {
MortonCopy<false, PixelFormat::RGBA8>,
MortonCopy<false, PixelFormat::DXT1>,
static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
SurfaceParams::MaxPixelFormat>
gl_to_morton_fns = {
MortonCopy<false, PixelFormat::ABGR8>,
MortonCopy<false, PixelFormat::B5G6R5>,
// TODO(Subv): Swizzling the DXT1 format is not yet supported
nullptr,
MortonCopy<false, PixelFormat::RGB10_A2>,
};
// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -183,7 +184,7 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
u32 buffers = 0;
if (type == SurfaceType::Color || type == SurfaceType::Texture) {
if (type == SurfaceType::ColorTexture) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
@@ -311,15 +312,18 @@ MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& su
bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
return std::tie(other_surface.addr, other_surface.width, other_surface.height,
other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) ==
std::tie(addr, width, height, stride, pixel_format, is_tiled) &&
other_surface.stride, other_surface.block_height, other_surface.pixel_format,
other_surface.component_type,
other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height,
pixel_format, component_type, is_tiled) &&
pixel_format != PixelFormat::Invalid;
}
bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
return sub_surface.addr >= addr && sub_surface.end <= end &&
sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
sub_surface.is_tiled == is_tiled &&
sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height &&
sub_surface.component_type == component_type &&
(sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
(sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
GetSubRect(sub_surface).left + sub_surface.width <= stride;
@@ -328,7 +332,8 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
addr <= expanded_surface.end && expanded_surface.addr <= end &&
is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride &&
is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height &&
component_type == expanded_surface.component_type && stride == expanded_surface.stride &&
(std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
0;
@@ -339,6 +344,10 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
end < texcopy_params.end) {
return false;
}
if (texcopy_params.block_height != block_height ||
texcopy_params.component_type != component_type)
return false;
if (texcopy_params.width != texcopy_params.stride) {
const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1)));
return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
@@ -481,18 +490,13 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) {
const u64 start_offset = load_start - addr;
if (!is_tiled) {
ASSERT(type == SurfaceType::Color);
const u32 bytes_per_pixel{GetFormatBpp() >> 3};
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
// the configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4,
texture_src_data + start_offset, &gl_buffer[start_offset],
true);
std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
bytes_per_pixel * width * height);
} else {
morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
load_start, load_end);
morton_to_gl_fns[static_cast<size_t>(pixel_format)](
stride, block_height, height, &gl_buffer[0], addr, load_start, load_end);
}
}
@@ -533,11 +537,10 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) {
if (backup_bytes)
std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
} else if (!is_tiled) {
ASSERT(type == SurfaceType::Color);
std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start);
} else {
gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
flush_start, flush_end);
gl_to_morton_fns[static_cast<size_t>(pixel_format)](
stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end);
}
}
@@ -556,7 +559,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
GLint y0 = static_cast<GLint>(rect.bottom);
size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format);
const FormatTuple& tuple = GetFormatTuple(pixel_format);
const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type);
GLuint target_tex = texture.handle;
// If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
@@ -629,7 +632,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui
OpenGLState prev_state = state;
SCOPE_EXIT({ prev_state.Apply(); });
const FormatTuple& tuple = GetFormatTuple(pixel_format);
const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type);
// Ensure no bad interactions with GL_PACK_ALIGNMENT
ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0);
@@ -662,7 +665,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui
state.draw.read_framebuffer = read_fb_handle;
state.Apply();
if (type == SurfaceType::Color || type == SurfaceType::Texture) {
if (type == SurfaceType::ColorTexture) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
texture.handle, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
@@ -1041,9 +1044,25 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
params.height = config.tic.Height();
params.is_tiled = config.tic.IsTiled();
params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
// TODO(Subv): Different types per component are not supported.
ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() &&
config.tic.r_type.Value() == config.tic.b_type.Value() &&
config.tic.r_type.Value() == config.tic.a_type.Value());
params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value());
if (config.tic.IsTiled()) {
params.block_height = config.tic.BlockHeight();
} else {
// Use the texture-provided stride value if the texture isn't tiled.
params.stride = params.PixelsInBytes(config.tic.Pitch());
}
params.UpdateParams();
if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0) {
if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 ||
params.stride != params.width) {
Surface src_surface;
MathUtil::Rectangle<u32> rect;
std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
@@ -1094,10 +1113,13 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
color_params.res_scale = resolution_scale_factor;
color_params.width = config.width;
color_params.height = config.height;
// TODO(Subv): Can framebuffers use a different block height?
color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
SurfaceParams depth_params = color_params;
color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address());
color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format);
color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format);
color_params.UpdateParams();
ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented");
@@ -1293,7 +1315,6 @@ void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface
const SurfaceInterval invalid_interval(addr, addr + size);
if (region_owner != nullptr) {
ASSERT(region_owner->type != SurfaceType::Texture);
ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end);
// Surfaces can't have a gap
ASSERT(region_owner->width == region_owner->stride);
@@ -1355,7 +1376,8 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {
surface->gl_buffer_size = 0;
surface->invalid_regions.insert(surface->GetInterval());
AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format),
AllocateSurfaceTexture(surface->texture.handle,
GetFormatTuple(surface->pixel_format, surface->component_type),
surface->GetScaledWidth(), surface->GetScaledHeight());
return surface;

View File

@@ -52,27 +52,43 @@ enum class ScaleMatch {
struct SurfaceParams {
enum class PixelFormat {
RGBA8 = 0,
DXT1 = 1,
ABGR8 = 0,
B5G6R5 = 1,
DXT1 = 2,
RGB10_A2 = 3,
Max,
Invalid = 255,
};
static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max);
enum class ComponentType {
Invalid = 0,
SNorm = 1,
UNorm = 2,
SInt = 3,
UInt = 4,
Float = 5,
};
enum class SurfaceType {
Color = 0,
Texture = 1,
Depth = 2,
DepthStencil = 3,
Fill = 4,
Invalid = 5
ColorTexture = 0,
Depth = 1,
DepthStencil = 2,
Fill = 3,
Invalid = 4,
};
static constexpr unsigned int GetFormatBpp(PixelFormat format) {
if (format == PixelFormat::Invalid)
return 0;
constexpr std::array<unsigned int, 2> bpp_table = {
32, // RGBA8
constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = {
32, // ABGR8
16, // B5G6R5
64, // DXT1
32, // RGB10_A2
};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -85,8 +101,11 @@ struct SurfaceParams {
static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
switch (format) {
case Tegra::RenderTargetFormat::RGBA8_UNORM:
return PixelFormat::RGBA8;
return PixelFormat::ABGR8;
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
return PixelFormat::RGB10_A2;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
@@ -94,8 +113,9 @@ struct SurfaceParams {
static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
return PixelFormat::RGBA8;
return PixelFormat::ABGR8;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
@@ -104,10 +124,63 @@ struct SurfaceParams {
// TODO(Subv): Properly implement this
switch (format) {
case Tegra::Texture::TextureFormat::A8R8G8B8:
return PixelFormat::RGBA8;
return PixelFormat::ABGR8;
case Tegra::Texture::TextureFormat::B5G6R5:
return PixelFormat::B5G6R5;
case Tegra::Texture::TextureFormat::DXT1:
return PixelFormat::DXT1;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) {
// TODO(Subv): Properly implement this
switch (format) {
case PixelFormat::ABGR8:
return Tegra::Texture::TextureFormat::A8R8G8B8;
case PixelFormat::B5G6R5:
return Tegra::Texture::TextureFormat::B5G6R5;
case PixelFormat::DXT1:
return Tegra::Texture::TextureFormat::DXT1;
case PixelFormat::RGB10_A2:
return Tegra::Texture::TextureFormat::RGB10_A2;
default:
UNREACHABLE();
}
}
static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
// TODO(Subv): Implement more component types
switch (type) {
case Tegra::Texture::ComponentType::UNORM:
return ComponentType::UNorm;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
UNREACHABLE();
}
}
static ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
// TODO(Subv): Implement more render targets
switch (format) {
case Tegra::RenderTargetFormat::RGBA8_UNORM:
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
return ComponentType::UNorm;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
static ComponentType ComponentTypeFromGPUPixelFormat(
Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
return ComponentType::UNorm;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
@@ -116,8 +189,7 @@ struct SurfaceParams {
SurfaceType a_type = GetFormatType(pixel_format_a);
SurfaceType b_type = GetFormatType(pixel_format_b);
if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) &&
(b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) {
return true;
}
@@ -133,12 +205,8 @@ struct SurfaceParams {
}
static SurfaceType GetFormatType(PixelFormat pixel_format) {
if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::RGBA8)) {
return SurfaceType::Color;
}
if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::DXT1)) {
return SurfaceType::Texture;
if (static_cast<size_t>(pixel_format) < MaxPixelFormat) {
return SurfaceType::ColorTexture;
}
// TODO(Subv): Implement the other formats
@@ -210,11 +278,13 @@ struct SurfaceParams {
u32 width = 0;
u32 height = 0;
u32 stride = 0;
u32 block_height = 0;
u16 res_scale = 1;
bool is_tiled = false;
PixelFormat pixel_format = PixelFormat::Invalid;
SurfaceType type = SurfaceType::Invalid;
ComponentType component_type = ComponentType::Invalid;
};
struct CachedSurface : SurfaceParams {

View File

@@ -17,6 +17,7 @@ using Tegra::Shader::Attribute;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
using Tegra::Shader::Sampler;
using Tegra::Shader::SubOp;
using Tegra::Shader::Uniform;
@@ -155,23 +156,27 @@ private:
/// Generates code representing an input attribute register.
std::string GetInputAttribute(Attribute::Index attribute) {
declr_input_attribute.insert(attribute);
switch (attribute) {
case Attribute::Index::Position:
return "position";
default:
const u32 index{static_cast<u32>(attribute) -
static_cast<u32>(Attribute::Index::Attribute_0)};
if (attribute >= Attribute::Index::Attribute_0) {
declr_input_attribute.insert(attribute);
return "input_attribute_" + std::to_string(index);
}
const u32 index{static_cast<u32>(attribute) -
static_cast<u32>(Attribute::Index::Attribute_0)};
if (attribute >= Attribute::Index::Attribute_0) {
return "input_attribute_" + std::to_string(index);
NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
UNREACHABLE();
}
LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index);
UNREACHABLE();
}
/// Generates code representing an output attribute register.
std::string GetOutputAttribute(Attribute::Index attribute) {
switch (attribute) {
case Attribute::Index::Position:
return "gl_Position";
return "position";
default:
const u32 index{static_cast<u32>(attribute) -
static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -180,22 +185,42 @@ private:
return "output_attribute_" + std::to_string(index);
}
LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index);
NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index);
UNREACHABLE();
}
}
/// Generates code representing an immediate value
static std::string GetImmediate(const Instruction& instr) {
return std::to_string(instr.alu.GetImm20());
}
/// Generates code representing a temporary (GPR) register.
std::string GetRegister(const Register& reg) {
return *declr_register.insert("register_" + std::to_string(reg)).first;
std::string GetRegister(const Register& reg, unsigned elem = 0) {
if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) {
// GPRs 0-3 are output color for the fragment shader
return std::string{"color."} + "rgba"[(reg + elem) & 3];
}
return *declr_register.insert("register_" + std::to_string(reg + elem)).first;
}
/// Generates code representing a uniform (C buffer) register.
std::string GetUniform(const Uniform& reg) {
declr_const_buffers[reg.index].MarkAsUsed(reg.index, reg.offset, stage);
declr_const_buffers[reg.index].MarkAsUsed(static_cast<unsigned>(reg.index),
static_cast<unsigned>(reg.offset), stage);
return 'c' + std::to_string(reg.index) + '[' + std::to_string(reg.offset) + ']';
}
/// Generates code representing a texture sampler.
std::string GetSampler(const Sampler& sampler) const {
// TODO(Subv): Support more than just texture sampler 0
ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported");
const unsigned index{static_cast<unsigned>(sampler.index.Value()) -
static_cast<unsigned>(Sampler::Index::Sampler_0)};
return "tex[" + std::to_string(index) + "]";
}
/**
* Adds code that calls a subroutine.
* @param subroutine the subroutine to call.
@@ -217,12 +242,13 @@ private:
* @param value the code representing the value to assign.
*/
void SetDest(u64 elem, const std::string& reg, const std::string& value,
u64 dest_num_components, u64 value_num_components) {
u64 dest_num_components, u64 value_num_components, bool is_abs = false) {
std::string swizzle = ".";
swizzle += "xyzw"[elem];
std::string dest = reg + (dest_num_components != 1 ? swizzle : "");
std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : "");
src = is_abs ? "abs(" + src + ")" : src;
shader.AddLine(dest + " = " + src + ";");
}
@@ -240,8 +266,6 @@ private:
switch (OpCode::GetInfo(instr.opcode).type) {
case OpCode::Type::Arithmetic: {
ASSERT(!instr.alu.abs_d);
std::string dest = GetRegister(instr.gpr0);
std::string op_a = instr.alu.negate_a ? "-" : "";
op_a += GetRegister(instr.gpr8);
@@ -250,63 +274,109 @@ private:
}
std::string op_b = instr.alu.negate_b ? "-" : "";
if (instr.is_b_gpr) {
op_b += GetRegister(instr.gpr20);
if (instr.is_b_imm) {
op_b += GetImmediate(instr);
} else {
op_b += GetUniform(instr.uniform);
if (instr.is_b_gpr) {
op_b += GetRegister(instr.gpr20);
} else {
op_b += GetUniform(instr.uniform);
}
}
if (instr.alu.abs_b) {
op_b = "abs(" + op_b + ")";
}
switch (instr.opcode.EffectiveOpCode()) {
case OpCode::Id::FMUL_C:
case OpCode::Id::FMUL_R: {
SetDest(0, dest, op_a + " * " + op_b, 1, 1);
case OpCode::Id::FMUL_R:
case OpCode::Id::FMUL_IMM: {
SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
break;
}
case OpCode::Id::FADD_C:
case OpCode::Id::FADD_R: {
SetDest(0, dest, op_a + " + " + op_b, 1, 1);
case OpCode::Id::FADD_R:
case OpCode::Id::FADD_IMM: {
SetDest(0, dest, op_a + " + " + op_b, 1, 1, instr.alu.abs_d);
break;
}
case OpCode::Id::MUFU: {
switch (instr.sub_op) {
case SubOp::Cos:
SetDest(0, dest, "cos(" + op_a + ")", 1, 1, instr.alu.abs_d);
break;
case SubOp::Sin:
SetDest(0, dest, "sin(" + op_a + ")", 1, 1, instr.alu.abs_d);
break;
case SubOp::Ex2:
SetDest(0, dest, "exp2(" + op_a + ")", 1, 1, instr.alu.abs_d);
break;
case SubOp::Lg2:
SetDest(0, dest, "log2(" + op_a + ")", 1, 1, instr.alu.abs_d);
break;
case SubOp::Rcp:
SetDest(0, dest, "1.0 / " + op_a, 1, 1, instr.alu.abs_d);
break;
case SubOp::Rsq:
SetDest(0, dest, "inversesqrt(" + op_a + ")", 1, 1, instr.alu.abs_d);
break;
case SubOp::Min:
SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d);
break;
default:
NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {}",
static_cast<unsigned>(instr.sub_op.Value()));
UNREACHABLE();
}
break;
}
default: {
LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
throw DecompileFail("Unhandled instruction");
break;
NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {} ({}): {}",
static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
OpCode::GetInfo(instr.opcode).name, instr.hex);
UNREACHABLE();
}
}
break;
}
case OpCode::Type::Ffma: {
ASSERT_MSG(!instr.ffma.negate_b, "untested");
ASSERT_MSG(!instr.ffma.negate_c, "untested");
std::string dest = GetRegister(instr.gpr0);
std::string op_a = GetRegister(instr.gpr8);
std::string op_b = instr.ffma.negate_b ? "-" : "";
op_b += GetUniform(instr.uniform);
std::string op_c = instr.ffma.negate_c ? "-" : "";
op_c += GetRegister(instr.gpr39);
switch (instr.opcode.EffectiveOpCode()) {
case OpCode::Id::FFMA_CR: {
SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
op_b += GetUniform(instr.uniform);
op_c += GetRegister(instr.gpr39);
break;
}
case OpCode::Id::FFMA_RR: {
op_b += GetRegister(instr.gpr20);
op_c += GetRegister(instr.gpr39);
break;
}
case OpCode::Id::FFMA_RC: {
op_b += GetRegister(instr.gpr39);
op_c += GetUniform(instr.uniform);
break;
}
case OpCode::Id::FFMA_IMM: {
op_b += GetImmediate(instr);
op_c += GetRegister(instr.gpr39);
break;
}
default: {
NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {} ({}): {}",
static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
OpCode::GetInfo(instr.opcode).name, instr.hex);
UNREACHABLE();
}
}
default: {
LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x",
static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
throw DecompileFail("Unhandled instruction");
break;
}
}
SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
break;
}
case OpCode::Type::Memory: {
@@ -315,22 +385,33 @@ private:
switch (instr.opcode.EffectiveOpCode()) {
case OpCode::Id::LD_A: {
ASSERT(instr.attribute.fmt20.size == 0);
ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4);
break;
}
case OpCode::Id::ST_A: {
ASSERT(instr.attribute.fmt20.size == 0);
ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1);
break;
}
default: {
LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x",
static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
throw DecompileFail("Unhandled instruction");
case OpCode::Id::TEXS: {
ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
const std::string op_a = GetRegister(instr.gpr8);
const std::string op_b = GetRegister(instr.gpr20);
const std::string sampler = GetSampler(instr.sampler);
const std::string coord = "vec2(" + op_a + ", " + op_b + ")";
const std::string texture = "texture(" + sampler + ", " + coord + ")";
for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4);
}
break;
}
default: {
NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {} ({}): {}",
static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
OpCode::GetInfo(instr.opcode).name, instr.hex);
UNREACHABLE();
}
}
break;
}
@@ -342,14 +423,18 @@ private:
offset = PROGRAM_END - 1;
break;
}
default: {
LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
throw DecompileFail("Unhandled instruction");
case OpCode::Id::IPA: {
const auto& attribute = instr.attribute.fmt28;
std::string dest = GetRegister(instr.gpr0);
SetDest(attribute.element, dest, GetInputAttribute(attribute.index), 1, 4);
break;
}
default: {
NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {} ({}): {}",
static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
OpCode::GetInfo(instr.opcode).name, instr.hex);
UNREACHABLE();
}
}
break;
@@ -514,7 +599,7 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
GLSLGenerator generator(subroutines, program_code, main_offset, stage);
return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
} catch (const DecompileFail& exception) {
LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what());
NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
}
return boost::none;
}

View File

@@ -27,10 +27,19 @@ out gl_PerVertex {
vec4 gl_Position;
};
out vec4 position;
layout (std140) uniform vs_config {
vec4 viewport_flip;
};
void main() {
exec_shader();
}
// Viewport can be flipped, which is unsupported by glViewport
position.xy *= viewport_flip.xy;
gl_Position = position;
}
)";
out += program.first;
return {out, program.second};
@@ -46,8 +55,13 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo
.get_value_or({});
out += R"(
in vec4 position;
out vec4 color;
layout (std140) uniform fs_config {
vec4 viewport_flip;
};
uniform sampler2D tex[32];
void main() {

View File

@@ -53,6 +53,12 @@ void SetShaderSamplerBindings(GLuint shader) {
} // namespace Impl
void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {}
void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
// TODO(bunnei): Support more than one viewport
viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0;
viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0;
}
} // namespace GLShader

View File

@@ -30,10 +30,9 @@ void SetShaderSamplerBindings(GLuint shader);
// Not following that rule will cause problems on some AMD drivers.
struct MaxwellUniformData {
void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
// TODO(Subv): Use this for something.
alignas(16) GLvec4 viewport_flip;
};
// static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is
// incorrect");
static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");

View File

@@ -31,7 +31,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_UNSIGNED_BYTE;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString().c_str());
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
UNREACHABLE();
return {};
}
@@ -40,7 +40,21 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_FLOAT;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString().c_str());
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
UNREACHABLE();
return {};
}
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
switch (index_format) {
case Maxwell::IndexFormat::UnsignedByte:
return GL_UNSIGNED_BYTE;
case Maxwell::IndexFormat::UnsignedShort:
return GL_UNSIGNED_SHORT;
case Maxwell::IndexFormat::UnsignedInt:
return GL_UNSIGNED_INT;
}
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
UNREACHABLE();
return {};
}
@@ -52,7 +66,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
case Maxwell::PrimitiveTopology::TriangleStrip:
return GL_TRIANGLE_STRIP;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented primitive topology=%d", topology);
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();
return {};
}
@@ -64,18 +78,90 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode) {
case Tegra::Texture::TextureFilter::Nearest:
return GL_NEAREST;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode=%u",
static_cast<u32>(filter_mode));
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}",
static_cast<u32>(filter_mode));
UNREACHABLE();
return {};
}
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
switch (wrap_mode) {
case Tegra::Texture::WrapMode::Wrap:
return GL_REPEAT;
case Tegra::Texture::WrapMode::ClampToEdge:
return GL_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::ClampOGL:
// TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
// GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to
// manually mix them. However the shader part of this is not yet implemented.
return GL_CLAMP_TO_BORDER;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode=%u", static_cast<u32>(wrap_mode));
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}",
static_cast<u32>(wrap_mode));
UNREACHABLE();
return {};
}
inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
switch (equation) {
case Maxwell::Blend::Equation::Add:
return GL_FUNC_ADD;
case Maxwell::Blend::Equation::Subtract:
return GL_FUNC_SUBTRACT;
case Maxwell::Blend::Equation::ReverseSubtract:
return GL_FUNC_REVERSE_SUBTRACT;
case Maxwell::Blend::Equation::Min:
return GL_MIN;
case Maxwell::Blend::Equation::Max:
return GL_MAX;
}
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
UNREACHABLE();
return {};
}
inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
switch (factor) {
case Maxwell::Blend::Factor::Zero:
return GL_ZERO;
case Maxwell::Blend::Factor::One:
return GL_ONE;
case Maxwell::Blend::Factor::SourceColor:
return GL_SRC_COLOR;
case Maxwell::Blend::Factor::OneMinusSourceColor:
return GL_ONE_MINUS_SRC_COLOR;
case Maxwell::Blend::Factor::SourceAlpha:
return GL_SRC_ALPHA;
case Maxwell::Blend::Factor::OneMinusSourceAlpha:
return GL_ONE_MINUS_SRC_ALPHA;
case Maxwell::Blend::Factor::DestAlpha:
return GL_DST_ALPHA;
case Maxwell::Blend::Factor::OneMinusDestAlpha:
return GL_ONE_MINUS_DST_ALPHA;
case Maxwell::Blend::Factor::DestColor:
return GL_DST_COLOR;
case Maxwell::Blend::Factor::OneMinusDestColor:
return GL_ONE_MINUS_DST_COLOR;
case Maxwell::Blend::Factor::SourceAlphaSaturate:
return GL_SRC_ALPHA_SATURATE;
case Maxwell::Blend::Factor::Source1Color:
return GL_SRC1_COLOR;
case Maxwell::Blend::Factor::OneMinusSource1Color:
return GL_ONE_MINUS_SRC1_COLOR;
case Maxwell::Blend::Factor::Source1Alpha:
return GL_SRC1_ALPHA;
case Maxwell::Blend::Factor::OneMinusSource1Alpha:
return GL_ONE_MINUS_SRC1_ALPHA;
case Maxwell::Blend::Factor::ConstantColor:
return GL_CONSTANT_COLOR;
case Maxwell::Blend::Factor::OneMinusConstantColor:
return GL_ONE_MINUS_CONSTANT_COLOR;
case Maxwell::Blend::Factor::ConstantAlpha:
return GL_CONSTANT_ALPHA;
case Maxwell::Blend::Factor::OneMinusConstantAlpha:
return GL_ONE_MINUS_CONSTANT_ALPHA;
}
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
UNREACHABLE();
return {};
}

View File

@@ -50,29 +50,31 @@ u32 BytesPerPixel(TextureFormat format) {
return 8;
case TextureFormat::A8R8G8B8:
return 4;
case TextureFormat::B5G6R5:
return 2;
default:
UNIMPLEMENTED_MSG("Format not implemented");
break;
}
}
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) {
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
u32 block_height) {
u8* data = Memory::GetPointer(address);
u32 bytes_per_pixel = BytesPerPixel(format);
static constexpr u32 DefaultBlockHeight = 16;
std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
switch (format) {
case TextureFormat::DXT1:
// In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values.
CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, DefaultBlockHeight);
unswizzled_data.data(), true, block_height);
break;
case TextureFormat::A8R8G8B8:
case TextureFormat::B5G6R5:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, DefaultBlockHeight);
unswizzled_data.data(), true, block_height);
break;
default:
UNIMPLEMENTED_MSG("Format not implemented");
@@ -90,6 +92,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
switch (format) {
case TextureFormat::DXT1:
case TextureFormat::A8R8G8B8:
case TextureFormat::B5G6R5:
// TODO(Subv): For the time being just forward the same data without any decoding.
rgba_data = texture_data;
break;

View File

@@ -14,7 +14,8 @@ namespace Texture {
/**
* Unswizzles a swizzled texture without changing its format.
*/
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height);
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
u32 block_height = TICEntry::DefaultBlockHeight);
/**
* Decodes an unswizzled texture into a A8R8G8B8 texture.

View File

@@ -4,6 +4,7 @@
#pragma once
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -13,8 +14,12 @@ namespace Tegra {
namespace Texture {
enum class TextureFormat : u32 {
A8R8G8B8 = 8,
A8R8G8B8 = 0x8,
B5G6R5 = 0x15,
DXT1 = 0x24,
DXT23 = 0x25,
DXT45 = 0x26,
RGB10_A2 = 0xD1,
};
enum class TextureType : u32 {
@@ -55,6 +60,8 @@ union TextureHandle {
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
struct TICEntry {
static constexpr u32 DefaultBlockHeight = 16;
union {
u32 raw;
BitField<0, 7, TextureFormat> format;
@@ -68,7 +75,12 @@ struct TICEntry {
BitField<0, 16, u32> address_high;
BitField<21, 3, TICHeaderVersion> header_version;
};
INSERT_PADDING_BYTES(4);
union {
BitField<3, 3, u32> block_height;
// High 16 bits of the pitch value
BitField<0, 16, u32> pitch_high;
};
union {
BitField<0, 16, u32> width_minus_1;
BitField<23, 4, TextureType> texture_type;
@@ -80,6 +92,13 @@ struct TICEntry {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
}
u32 Pitch() const {
ASSERT(header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey);
// The pitch value is 21 bits, and is 32B aligned.
return pitch_high << 5;
}
u32 Width() const {
return width_minus_1 + 1;
}
@@ -88,6 +107,13 @@ struct TICEntry {
return height_minus_1 + 1;
}
u32 BlockHeight() const {
ASSERT(header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey);
// The block height is stored in log2 format.
return 1 << block_height;
}
bool IsTiled() const {
return header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey;