Compare commits

..

27 Commits

Author SHA1 Message Date
Subv
c5284efd4f Rasterizer: Implemented instanced rendering.
We keep track of the current instance and update an uniform in the shaders to let them know which instance they are.

Instanced vertex arrays are not yet implemented.
2018-08-14 22:25:07 -05:00
bunnei
3aad82b1a3 Merge pull request #1069 from bunnei/vtx-sz
maxwell_to_gl: Properly handle UnsignedInt/SignedInt sizes.
2018-08-14 23:14:44 -04:00
bunnei
2a42dea568 Merge pull request #1070 from bunnei/cbuf-sz
gl_rasterizer: Fix upload size for constant buffers.
2018-08-14 23:14:24 -04:00
bunnei
c8cd1785e6 Merge pull request #1071 from bunnei/fix-ldc
gl_shader_decompiler: Several fixes for indirect constant buffer loads.
2018-08-14 23:14:09 -04:00
bunnei
991eb4824c Merge pull request #1068 from bunnei/g8r8s
gl_rasterizer_cache: Implement G8R8S format.
2018-08-14 23:13:43 -04:00
bunnei
301baaa942 Merge pull request #1067 from lioncash/init
emu_window: Ensure WindowConfig members are always initialized
2018-08-14 22:43:32 -04:00
bunnei
3db1b8e0cd Merge pull request #1073 from lioncash/3ds
loader: Remove address mapping remnants from citra
2018-08-14 22:43:04 -04:00
bunnei
8f9c49f7ee Merge pull request #1072 from lioncash/svc
kernel/svc: Log svcBreak parameters
2018-08-14 22:42:44 -04:00
bunnei
f009ed63f3 Merge pull request #1063 from lioncash/inline
common/xbyak_abi: Mark defined functions in header as inline
2018-08-14 22:40:23 -04:00
bunnei
18dfa99030 Merge pull request #1074 from greggameplayer/Z16_UNORM
Implement Z16 in PixelFormatFromTextureFormat function
2018-08-14 22:39:09 -04:00
greggameplayer
6eda9ebbdb Implement Z16_UNORM in PixelFormatFromTextureFormat function
Require by Zelda Breath Of The Wild
2018-08-15 04:14:15 +02:00
bunnei
ad7815a28d Merge pull request #1054 from zhaowenlan1779/misc-fixup
common/misc: use windows.h
2018-08-14 21:47:28 -04:00
bunnei
409d2e07c2 Merge pull request #1056 from lioncash/mm
mm_u: Move interface class into the cpp file
2018-08-14 21:47:07 -04:00
bunnei
8dc4407586 Merge pull request #1066 from lioncash/aarch64
CMakeLists: Add architecture detection for AArch64
2018-08-14 21:46:53 -04:00
Lioncash
96c0b81a51 loader: Remove address mapping remnants from citra
These mappings are leftovers from citra and don't apply to the Switch.
2018-08-14 21:37:03 -04:00
Lioncash
25d71454d1 kernel/svc: Log svcBreak parameters
Given if we hit here all is lost, we should probably be logging the
break reason code and associated information to distinguish between the
causes.
2018-08-14 20:54:05 -04:00
bunnei
5e66a24423 gl_shader_decompiler: Several fixes for indirect constant buffer loads. 2018-08-14 20:47:50 -04:00
bunnei
290439a6a5 gl_rasterizer: Fix upload size for constant buffers. 2018-08-14 20:44:19 -04:00
bunnei
dc876fd63a maxwell_to_gl: Properly handle UnsignedInt/SignedInt sizes. 2018-08-14 20:43:02 -04:00
bunnei
d8fd3ef4fe gl_rasterizer_cache: Implement G8R8S format.
- Used by Super Mario Odyssey.
2018-08-14 20:41:49 -04:00
bunnei
1c31cbad72 Merge pull request #1062 from lioncash/unused
common: Remove unused old breakpoint source files
2018-08-14 20:26:56 -04:00
Lioncash
2e715ef70d emu_window: Ensure WindowConfig members are always initialized
Previously we weren't always initializing all members of the struct.
Prevents potentially wonky behavior from occurring.
2018-08-14 19:36:43 -04:00
Lioncash
319dbc5843 CMakeLists: Add architecture detection for AArch64
We already have an equivalent in place for the 32-bit ARM architecture, so we
should also have one for the newer 64-bit ARM architecture as well.
2018-08-14 19:06:55 -04:00
Lioncash
11895d54af common: Remove unused old breakpoint source files
These currently aren't used and contain commented out source code that
corresponds to Dolphin's JIT. Given our CPU code is organized quite
differently, we shouldn't be keeping this around (at the moment it just
adds to compile times marginally).
2018-08-14 18:14:01 -04:00
Lioncash
b6c47b578f mm_u: Forward all old variants of functions to the new ones
Ensures both variants go through the same interface, and while we're at
it, add Finalize to provide the inverse of Initialize for consistency.
2018-08-13 18:59:10 -04:00
Lioncash
9d09d92c56 mm_u: Move implementation class into the cpp file
Now if changes are ever made to the behavior of the class, it doesn't
involve rebuilding everything that includes the mm_u header.
2018-08-13 18:59:07 -04:00
Zhu PengFei
59d18ef55b common/misc: use windows.h
linux-mingw does not really like this.
2018-08-14 04:28:24 +08:00
25 changed files with 190 additions and 273 deletions

View File

@@ -66,10 +66,12 @@ if (NOT ENABLE_GENERIC)
detect_architecture("_M_AMD64" x86_64)
detect_architecture("_M_IX86" x86)
detect_architecture("_M_ARM" ARM)
detect_architecture("_M_ARM64" ARM64)
else()
detect_architecture("__x86_64__" x86_64)
detect_architecture("__i386__" x86)
detect_architecture("__arm__" ARM)
detect_architecture("__aarch64__" ARM64)
endif()
endif()

View File

@@ -29,8 +29,6 @@ add_library(common STATIC
assert.h
bit_field.h
bit_set.h
break_points.cpp
break_points.h
cityhash.cpp
cityhash.h
color.h

View File

@@ -1,90 +0,0 @@
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <sstream>
#include "common/break_points.h"
bool BreakPoints::IsAddressBreakPoint(u32 iAddress) const {
auto cond = [&iAddress](const TBreakPoint& bp) { return bp.iAddress == iAddress; };
auto it = std::find_if(m_BreakPoints.begin(), m_BreakPoints.end(), cond);
return it != m_BreakPoints.end();
}
bool BreakPoints::IsTempBreakPoint(u32 iAddress) const {
auto cond = [&iAddress](const TBreakPoint& bp) {
return bp.iAddress == iAddress && bp.bTemporary;
};
auto it = std::find_if(m_BreakPoints.begin(), m_BreakPoints.end(), cond);
return it != m_BreakPoints.end();
}
BreakPoints::TBreakPointsStr BreakPoints::GetStrings() const {
TBreakPointsStr bps;
for (auto breakpoint : m_BreakPoints) {
if (!breakpoint.bTemporary) {
std::stringstream bp;
bp << std::hex << breakpoint.iAddress << " " << (breakpoint.bOn ? "n" : "");
bps.push_back(bp.str());
}
}
return bps;
}
void BreakPoints::AddFromStrings(const TBreakPointsStr& bps) {
for (auto bps_item : bps) {
TBreakPoint bp;
std::stringstream bpstr;
bpstr << std::hex << bps_item;
bpstr >> bp.iAddress;
bp.bOn = bps_item.find("n") != bps_item.npos;
bp.bTemporary = false;
Add(bp);
}
}
void BreakPoints::Add(const TBreakPoint& bp) {
if (!IsAddressBreakPoint(bp.iAddress)) {
m_BreakPoints.push_back(bp);
// if (jit)
// jit->GetBlockCache()->InvalidateICache(bp.iAddress, 4);
}
}
void BreakPoints::Add(u32 em_address, bool temp) {
if (!IsAddressBreakPoint(em_address)) // only add new addresses
{
TBreakPoint pt; // breakpoint settings
pt.bOn = true;
pt.bTemporary = temp;
pt.iAddress = em_address;
m_BreakPoints.push_back(pt);
// if (jit)
// jit->GetBlockCache()->InvalidateICache(em_address, 4);
}
}
void BreakPoints::Remove(u32 em_address) {
auto cond = [&em_address](const TBreakPoint& bp) { return bp.iAddress == em_address; };
auto it = std::find_if(m_BreakPoints.begin(), m_BreakPoints.end(), cond);
if (it != m_BreakPoints.end())
m_BreakPoints.erase(it);
}
void BreakPoints::Clear() {
// if (jit)
//{
// std::for_each(m_BreakPoints.begin(), m_BreakPoints.end(),
// [](const TBreakPoint& bp)
// {
// jit->GetBlockCache()->InvalidateICache(bp.iAddress, 4);
// }
// );
//}
m_BreakPoints.clear();
}

View File

@@ -1,49 +0,0 @@
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <vector>
#include "common/common_types.h"
class DebugInterface;
struct TBreakPoint {
u32 iAddress;
bool bOn;
bool bTemporary;
};
// Code breakpoints.
class BreakPoints {
public:
typedef std::vector<TBreakPoint> TBreakPoints;
typedef std::vector<std::string> TBreakPointsStr;
const TBreakPoints& GetBreakPoints() {
return m_BreakPoints;
}
TBreakPointsStr GetStrings() const;
void AddFromStrings(const TBreakPointsStr& bps);
// is address breakpoint
bool IsAddressBreakPoint(u32 iAddress) const;
bool IsTempBreakPoint(u32 iAddress) const;
// Add BreakPoint
void Add(u32 em_address, bool temp = false);
void Add(const TBreakPoint& bp);
// Remove Breakpoint
void Remove(u32 iAddress);
void Clear();
void DeleteByAddress(u32 Address);
private:
TBreakPoints m_BreakPoints;
u32 m_iBreakOnCount;
};

View File

@@ -4,7 +4,7 @@
#include <cstddef>
#ifdef _WIN32
#include <Windows.h>
#include <windows.h>
#else
#include <cerrno>
#include <cstring>

View File

@@ -34,9 +34,9 @@ class EmuWindow {
public:
/// Data structure to store emuwindow configuration
struct WindowConfig {
bool fullscreen;
int res_width;
int res_height;
bool fullscreen = false;
int res_width = 0;
int res_height = 0;
std::pair<unsigned, unsigned> min_client_area_size;
};

View File

@@ -250,8 +250,11 @@ static ResultCode ArbitrateUnlock(VAddr mutex_addr) {
}
/// Break program execution
static void Break(u64 unk_0, u64 unk_1, u64 unk_2) {
LOG_CRITICAL(Debug_Emulated, "Emulated program broke execution!");
static void Break(u64 reason, u64 info1, u64 info2) {
LOG_CRITICAL(
Debug_Emulated,
"Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
reason, info1, info2);
ASSERT(false);
}

View File

@@ -9,42 +9,63 @@
namespace Service::MM {
class MM_U final : public ServiceFramework<MM_U> {
public:
explicit MM_U() : ServiceFramework{"mm:u"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, &MM_U::Initialize, "InitializeOld"},
{1, &MM_U::Finalize, "FinalizeOld"},
{2, &MM_U::SetAndWait, "SetAndWaitOld"},
{3, &MM_U::Get, "GetOld"},
{4, &MM_U::Initialize, "Initialize"},
{5, &MM_U::Finalize, "Finalize"},
{6, &MM_U::SetAndWait, "SetAndWait"},
{7, &MM_U::Get, "Get"},
};
// clang-format on
RegisterHandlers(functions);
}
private:
void Initialize(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_MM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Finalize(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_MM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void SetAndWait(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
min = rp.Pop<u32>();
max = rp.Pop<u32>();
current = min;
LOG_WARNING(Service_MM, "(STUBBED) called, min=0x{:X}, max=0x{:X}", min, max);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Get(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_MM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push(current);
}
u32 min{0};
u32 max{0};
u32 current{0};
};
void InstallInterfaces(SM::ServiceManager& service_manager) {
std::make_shared<MM_U>()->InstallAsService(service_manager);
}
void MM_U::Initialize(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_MM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void MM_U::SetAndWait(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
min = rp.Pop<u32>();
max = rp.Pop<u32>();
current = min;
LOG_WARNING(Service_MM, "(STUBBED) called, min=0x{:X}, max=0x{:X}", min, max);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void MM_U::Get(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_MM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push(current);
}
MM_U::MM_U() : ServiceFramework("mm:u") {
static const FunctionInfo functions[] = {
{0, nullptr, "InitializeOld"}, {1, nullptr, "FinalizeOld"},
{2, nullptr, "SetAndWaitOld"}, {3, nullptr, "GetOld"},
{4, &MM_U::Initialize, "Initialize"}, {5, nullptr, "Finalize"},
{6, &MM_U::SetAndWait, "SetAndWait"}, {7, &MM_U::Get, "Get"},
};
RegisterHandlers(functions);
}
} // namespace Service::MM

View File

@@ -8,21 +8,6 @@
namespace Service::MM {
class MM_U final : public ServiceFramework<MM_U> {
public:
MM_U();
~MM_U() = default;
private:
void Initialize(Kernel::HLERequestContext& ctx);
void SetAndWait(Kernel::HLERequestContext& ctx);
void Get(Kernel::HLERequestContext& ctx);
u32 min{0};
u32 max{0};
u32 current{0};
};
/// Registers all MM services with the specified service manager.
void InstallInterfaces(SM::ServiceManager& service_manager);

View File

@@ -118,7 +118,6 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(
process->program_id = metadata.GetTitleID();
process->svc_access_mask.set();
process->address_mappings = default_address_mappings;
process->resource_limit =
Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
process->Run(Memory::PROCESS_IMAGE_VADDR, metadata.GetMainThreadPriority(),

View File

@@ -398,7 +398,6 @@ ResultStatus AppLoader_ELF::Load(Kernel::SharedPtr<Kernel::Process>& process) {
process->LoadModule(codeset, codeset->entrypoint);
process->svc_access_mask.set();
process->address_mappings = default_address_mappings;
// Attach the default resource limit (APPLICATION) to the process
process->resource_limit =

View File

@@ -17,12 +17,6 @@
namespace Loader {
const std::initializer_list<Kernel::AddressMapping> default_address_mappings = {
{0x1FF50000, 0x8000, true}, // part of DSP RAM
{0x1FF70000, 0x8000, true}, // part of DSP RAM
{0x1F000000, 0x600000, false}, // entire VRAM
};
FileType IdentifyFile(FileSys::VirtualFile file) {
FileType type;

View File

@@ -5,7 +5,6 @@
#pragma once
#include <algorithm>
#include <initializer_list>
#include <memory>
#include <string>
#include <utility>
@@ -207,12 +206,6 @@ protected:
bool is_loaded = false;
};
/**
* Common address mappings found in most games, used for binary formats that don't have this
* information.
*/
extern const std::initializer_list<Kernel::AddressMapping> default_address_mappings;
/**
* Identifies a bootable file and return a suitable loader
* @param file The bootable file

View File

@@ -186,7 +186,6 @@ ResultStatus AppLoader_NRO::Load(Kernel::SharedPtr<Kernel::Process>& process) {
}
process->svc_access_mask.set();
process->address_mappings = default_address_mappings;
process->resource_limit =
Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
process->Run(base_addr, THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);

View File

@@ -152,7 +152,6 @@ ResultStatus AppLoader_NSO::Load(Kernel::SharedPtr<Kernel::Process>& process) {
LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), Memory::PROCESS_IMAGE_VADDR);
process->svc_access_mask.set();
process->address_mappings = default_address_mappings;
process->resource_limit =
Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
process->Run(Memory::PROCESS_IMAGE_VADDR, THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);

View File

@@ -222,6 +222,18 @@ void Maxwell3D::DrawArrays() {
debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
}
// Both instance configuration registers can not be set at the same time.
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
if (regs.draw.instance_next) {
// Increment the current instance *before* drawing.
state.current_instance += 1;
} else if (!regs.draw.instance_cont) {
// Reset the current instance to 0.
state.current_instance = 0;
}
const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
rasterizer.AccelerateDrawBatch(is_indexed);

View File

@@ -638,6 +638,8 @@ public:
union {
u32 vertex_begin_gl;
BitField<0, 16, PrimitiveTopology> topology;
BitField<26, 1, u32> instance_next;
BitField<27, 1, u32> instance_cont;
};
} draw;
@@ -830,6 +832,7 @@ public:
};
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
};
State state{};

View File

@@ -124,7 +124,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset,
vertex_array.stride);
ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented");
ASSERT_MSG(vertex_array.divisor == 0, "Instanced vertex arrays are not supported");
}
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
@@ -648,11 +648,11 @@ std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(
if (used_buffer.IsIndirect()) {
// Buffer is accessed indirectly, so upload the entire thing
size = buffer.size * sizeof(float);
size = buffer.size;
if (size > MaxConstbufferSize) {
LOG_ERROR(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size,
MaxConstbufferSize);
LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size,
MaxConstbufferSize);
size = MaxConstbufferSize;
}
} else {

View File

@@ -119,7 +119,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // BC7U
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8U
{GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // G8R8S
{GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
{GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F
{GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F
@@ -260,7 +261,8 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
MortonCopy<true, PixelFormat::DXN2SNORM>,
MortonCopy<true, PixelFormat::BC7U>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
MortonCopy<true, PixelFormat::G8R8>,
MortonCopy<true, PixelFormat::G8R8U>,
MortonCopy<true, PixelFormat::G8R8S>,
MortonCopy<true, PixelFormat::BGRA8>,
MortonCopy<true, PixelFormat::RGBA32F>,
MortonCopy<true, PixelFormat::RG32F>,
@@ -315,7 +317,8 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
nullptr,
nullptr,
nullptr,
MortonCopy<false, PixelFormat::G8R8>,
MortonCopy<false, PixelFormat::G8R8U>,
MortonCopy<false, PixelFormat::G8R8S>,
MortonCopy<false, PixelFormat::BGRA8>,
MortonCopy<false, PixelFormat::RGBA32F>,
MortonCopy<false, PixelFormat::RG32F>,
@@ -461,7 +464,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
}
static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8)};
const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
for (size_t y = 0; y < height; ++y) {
for (size_t x = 0; x < width; ++x) {
const size_t offset{bpp * (y * width + x)};
@@ -493,7 +496,8 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
ConvertS8Z24ToZ24S8(data, width, height);
break;
case PixelFormat::G8R8:
case PixelFormat::G8R8U:
case PixelFormat::G8R8S:
// Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8.
ConvertG8R8ToR8G8(data, width, height);
break;

View File

@@ -43,36 +43,37 @@ struct SurfaceParams {
DXN2SNORM = 17,
BC7U = 18,
ASTC_2D_4X4 = 19,
G8R8 = 20,
BGRA8 = 21,
RGBA32F = 22,
RG32F = 23,
R32F = 24,
R16F = 25,
R16UNORM = 26,
R16S = 27,
R16UI = 28,
R16I = 29,
RG16 = 30,
RG16F = 31,
RG16UI = 32,
RG16I = 33,
RG16S = 34,
RGB32F = 35,
SRGBA8 = 36,
RG8U = 37,
RG8S = 38,
RG32UI = 39,
R32UI = 40,
G8R8U = 20,
G8R8S = 21,
BGRA8 = 22,
RGBA32F = 23,
RG32F = 24,
R32F = 25,
R16F = 26,
R16UNORM = 27,
R16S = 28,
R16UI = 29,
R16I = 30,
RG16 = 31,
RG16F = 32,
RG16UI = 33,
RG16I = 34,
RG16S = 35,
RGB32F = 36,
SRGBA8 = 37,
RG8U = 38,
RG8S = 39,
RG32UI = 40,
R32UI = 41,
MaxColorFormat,
// DepthStencil formats
Z24S8 = 41,
S8Z24 = 42,
Z32F = 43,
Z16 = 44,
Z32FS8 = 45,
Z24S8 = 42,
S8Z24 = 43,
Z32F = 44,
Z16 = 45,
Z32FS8 = 46,
MaxDepthStencilFormat,
@@ -130,7 +131,8 @@ struct SurfaceParams {
4, // DXN2SNORM
4, // BC7U
4, // ASTC_2D_4X4
1, // G8R8
1, // G8R8U
1, // G8R8S
1, // BGRA8
1, // RGBA32F
1, // RG32F
@@ -187,7 +189,8 @@ struct SurfaceParams {
128, // DXN2SNORM
128, // BC7U
32, // ASTC_2D_4X4
16, // G8R8
16, // G8R8U
16, // G8R8S
32, // BGRA8
128, // RGBA32F
64, // RG32F
@@ -341,7 +344,15 @@ struct SurfaceParams {
static_cast<u32>(component_type));
UNREACHABLE();
case Tegra::Texture::TextureFormat::G8R8:
return PixelFormat::G8R8;
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
return PixelFormat::G8R8U;
case Tegra::Texture::ComponentType::SNORM:
return PixelFormat::G8R8S;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
static_cast<u32>(component_type));
UNREACHABLE();
case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
return PixelFormat::RGBA16F;
case Tegra::Texture::TextureFormat::BF10GF11RF11:
@@ -396,6 +407,8 @@ struct SurfaceParams {
UNREACHABLE();
case Tegra::Texture::TextureFormat::ZF32:
return PixelFormat::Z32F;
case Tegra::Texture::TextureFormat::Z16:
return PixelFormat::Z16;
case Tegra::Texture::TextureFormat::Z24S8:
return PixelFormat::Z24S8;
case Tegra::Texture::TextureFormat::DXT1:

View File

@@ -383,15 +383,13 @@ public:
}
}
std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg,
std::string GetUniformIndirect(u64 cbuf_index, s64 offset, const std::string& index_str,
GLSLRegister::Type type) {
declr_const_buffers[index].MarkAsUsedIndirect(index, stage);
declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage);
std::string final_offset = "((floatBitsToInt(" + GetRegister(index_reg, 0) + ") + " +
std::to_string(offset) + ") / 4)";
std::string value =
'c' + std::to_string(index) + '[' + final_offset + " / 4][" + final_offset + " % 4]";
std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
final_offset + " % 4]";
if (type == GLSLRegister::Type::Float) {
return value;
@@ -540,7 +538,7 @@ private:
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
// shader.
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))";
return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))";
default:
const u32 index{static_cast<u32>(attribute) -
static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -1355,11 +1353,16 @@ private:
case OpCode::Id::LD_C: {
ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented");
// Add an extra scope and declare the index register inside to prevent
// overwriting it in case it is used as an output of the LD instruction.
shader.AddLine("{");
++shader.scope;
shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
" / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
std::string op_a =
regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8,
GLSLRegister::Type::Float);
std::string op_b =
regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8,
regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index",
GLSLRegister::Type::Float);
switch (instr.ld_c.type.Value()) {
@@ -1367,16 +1370,22 @@ private:
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
break;
case Tegra::Shader::UniformType::Double:
case Tegra::Shader::UniformType::Double: {
std::string op_b =
regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4,
"index", GLSLRegister::Type::Float);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1);
break;
}
default:
LOG_CRITICAL(HW_GPU, "Unhandled type: {}",
static_cast<unsigned>(instr.ld_c.type.Value()));
UNREACHABLE();
}
--shader.scope;
shader.AddLine("}");
break;
}
case OpCode::Id::ST_A: {

View File

@@ -38,6 +38,7 @@ out vec4 position;
layout (std140) uniform vs_config {
vec4 viewport_flip;
uvec4 instance_id;
};
void main() {
@@ -90,6 +91,7 @@ out vec4 color;
layout (std140) uniform fs_config {
vec4 viewport_flip;
uvec4 instance_id;
};
void main() {

View File

@@ -37,11 +37,16 @@ void SetShaderUniformBlockBindings(GLuint shader) {
} // namespace Impl
void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
const auto& state = gpu.state;
// TODO(bunnei): Support more than one viewport
viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
// We only assign the instance to the first component of the vector, the rest is just padding.
instance_id[0] = state.current_instance;
}
} // namespace GLShader

View File

@@ -24,14 +24,15 @@ void SetShaderUniformBlockBindings(GLuint shader);
} // namespace Impl
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
struct MaxwellUniformData {
void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
alignas(16) GLvec4 viewport_flip;
alignas(16) GLuvec4 instance_id;
};
static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");

View File

@@ -24,16 +24,25 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
case Maxwell::VertexAttribute::Type::UnsignedInt:
case Maxwell::VertexAttribute::Type::UnsignedNorm: {
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_UNSIGNED_BYTE;
case Maxwell::VertexAttribute::Size::Size_16:
case Maxwell::VertexAttribute::Size::Size_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return GL_UNSIGNED_SHORT;
case Maxwell::VertexAttribute::Size::Size_32:
case Maxwell::VertexAttribute::Size::Size_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_UNSIGNED_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
}
@@ -43,16 +52,25 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return {};
}
case Maxwell::VertexAttribute::Type::SignedInt:
case Maxwell::VertexAttribute::Type::SignedNorm: {
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_32_32_32:
return GL_INT;
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_BYTE;
case Maxwell::VertexAttribute::Size::Size_16:
case Maxwell::VertexAttribute::Size::Size_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16:
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return GL_SHORT;
case Maxwell::VertexAttribute::Size::Size_32:
case Maxwell::VertexAttribute::Size::Size_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_INT_2_10_10_10_REV;
}
@@ -62,9 +80,6 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return {};
}
case Maxwell::VertexAttribute::Type::UnsignedInt:
return GL_UNSIGNED_INT;
case Maxwell::VertexAttribute::Type::Float:
return GL_FLOAT;
}