Compare commits

..

29 Commits

Author SHA1 Message Date
Subv
aa586fa268 GPU: Store uploaded GPU macros and keep track of the number of method parameters. 2018-03-18 11:51:46 -05:00
Subv
7ac8657432 GPU: Macros are specific to the Maxwell3D engine, so handle them internally. 2018-03-18 11:51:45 -05:00
bunnei
29981fa2eb Merge pull request #245 from Subv/set_shader2
GPU: Store shader constbuffer bindings in the GPU state.
2018-03-17 21:19:39 -04:00
Subv
ccb8da1512 GPU: Renamed ShaderType to ShaderStage as that is less confusing. 2018-03-17 18:32:57 -05:00
Subv
88698c156f GPU: Store shader constbuffer bindings in the GPU state. 2018-03-17 18:32:57 -05:00
Subv
66dae22790 GPU: Corrected some register offsets and removed superfluous macro registers. 2018-03-17 18:32:56 -05:00
Subv
1d9d9c16e8 GPU: Make the SetShader macro call do the same as the real macro's code.
It'll now set the CB_SIZE, CB_ADDRESS and CB_BIND registers when it's called.

Presumably this SetShader function is binding the constant shader uniforms to buffer 1 (c1[]).
2018-03-17 18:32:55 -05:00
Subv
579000e747 GPU: Corrected the parameter documentation for the SetShader macro call.
Register 0xE24 is actually a macro that sets some shader parameters in the register structure.

Macros are uploaded to the GPU at startup and have their own ISA, we'll probably write an interpreter for this in the future.
2018-03-17 13:55:42 -05:00
bunnei
516ef4f19f Merge pull request #242 from Subv/set_shader
GPU: Handle the SetShader method call (0xE24) and store the shader config.
2018-03-17 00:34:17 -04:00
bunnei
c286921739 Merge pull request #243 from Subv/vertex_buffer
GPU: Added the vertex array registers.
2018-03-17 00:04:31 -04:00
Subv
f93d769a1c GPU: Handle the SetShader method call (0xE24) and store the shader config. 2018-03-16 22:51:06 -05:00
Subv
d2888f7e90 GPU: Added the vertex array registers. 2018-03-16 22:47:45 -05:00
bunnei
cd4e8a989c Merge pull request #241 from Subv/gpu_method_call
GPU: Process command mode 5 (IncreaseOnce) differently from other commands
2018-03-16 22:28:22 -04:00
Subv
29feece4b8 GPU: Process command mode 5 (IncreaseOnce) differently from other commands.
Accumulate all arguments before calling the desired method.

Note: Maybe we should do the same for the NonIncreasing mode?
2018-03-16 20:32:44 -05:00
bunnei
0eff775264 Merge pull request #239 from Subv/shaders
GPU: Added some shader-related registers.
2018-03-16 21:09:35 -04:00
bunnei
e453b09a61 Merge pull request #238 from bunnei/fix-buffer-check
nvflinger: Remove superfluous buffer format check.
2018-03-16 21:04:39 -04:00
Subv
bf310a41b8 GPU: Assert that we get a 0 CODE_ADDRESS register in the 3D engine.
Shader address calculation depends on this value to some extent, we do not currently know what it being 0 entails.
2018-03-16 19:24:41 -05:00
Subv
cbec739e7b GPU: Added Maxwell registers for Shader Program control. 2018-03-16 19:23:11 -05:00
bunnei
494275fd38 nvflinger: Remove superfluous buffer format check. 2018-03-16 20:11:50 -04:00
bunnei
e7ba2a4447 Merge pull request #232 from bunnei/heap-fixes
Various heap fixes for libtransistor
2018-03-16 20:06:27 -04:00
bunnei
cc6f22e0e4 process: MirrorMemory should use MemoryState::Mapped. 2018-03-16 19:24:54 -04:00
bunnei
e9a857ce82 process: Unmap previously allocated heap. 2018-03-16 18:32:25 -04:00
bunnei
403f8e79ea arm_interface: Support unmapping previously mapped memory. 2018-03-16 18:32:24 -04:00
bunnei
34a29ad051 svc: Use more correct values for GetInfo MapRegion and NewMapRegion. 2018-03-16 18:32:23 -04:00
bunnei
8581404482 kernel: Move stack region outside of application heap. 2018-03-16 18:32:23 -04:00
bunnei
69ee9edd8d memory: Add regions for map region, "new" map region, etc. 2018-03-16 18:32:22 -04:00
bunnei
3923b0f589 process: Fix stack memory state. 2018-03-16 18:32:21 -04:00
bunnei
8be7131033 MemoryState: Add additional memory states and improve naming. 2018-03-16 18:32:21 -04:00
bunnei
07ae1f972d Merge pull request #237 from mailwl/nifm-module
Service/NIFM: convert to module
2018-03-16 18:26:02 -04:00
21 changed files with 475 additions and 103 deletions

View File

@@ -39,8 +39,12 @@ public:
Run(1);
}
/// Maps a backing memory region for the CPU
virtual void MapBackingMemory(VAddr address, size_t size, u8* memory,
Kernel::VMAPermission perms) {}
Kernel::VMAPermission perms) = 0;
/// Unmaps a region of memory that was previously mapped using MapBackingMemory
virtual void UnmapMemory(VAddr address, size_t size) = 0;
/// Clear all instruction cache
virtual void ClearInstructionCache() = 0;

View File

@@ -136,6 +136,10 @@ void ARM_Dynarmic::MapBackingMemory(u64 address, size_t size, u8* memory,
inner_unicorn.MapBackingMemory(address, size, memory, perms);
}
void ARM_Dynarmic::UnmapMemory(u64 address, size_t size) {
inner_unicorn.UnmapMemory(address, size);
}
void ARM_Dynarmic::SetPC(u64 pc) {
jit->SetPC(pc);
}

View File

@@ -19,7 +19,7 @@ public:
void MapBackingMemory(VAddr address, size_t size, u8* memory,
Kernel::VMAPermission perms) override;
void UnmapMemory(u64 address, size_t size) override;
void SetPC(u64 pc) override;
u64 GetPC() const override;
u64 GetReg(int index) const override;

View File

@@ -77,6 +77,10 @@ void ARM_Unicorn::MapBackingMemory(VAddr address, size_t size, u8* memory,
CHECKED(uc_mem_map_ptr(uc, address, size, static_cast<u32>(perms), memory));
}
void ARM_Unicorn::UnmapMemory(VAddr address, size_t size) {
CHECKED(uc_mem_unmap(uc, address, size));
}
void ARM_Unicorn::SetPC(u64 pc) {
CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc));
}

View File

@@ -14,6 +14,7 @@ public:
~ARM_Unicorn();
void MapBackingMemory(VAddr address, size_t size, u8* memory,
Kernel::VMAPermission perms) override;
void UnmapMemory(VAddr address, size_t size) override;
void SetPC(u64 pc) override;
u64 GetPC() const override;
u64 GetReg(int index) const override;

View File

@@ -33,10 +33,6 @@ enum class HandleType : u32 {
ServerSession,
};
enum {
DEFAULT_STACK_SIZE = 0x10000,
};
enum class ResetType {
OneShot,
Sticky,

View File

@@ -117,11 +117,12 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {
}
void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
// Allocate and map stack
// Allocate and map the main thread stack
// TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
// of the user address space.
vm_manager
.MapMemoryBlock(Memory::HEAP_VADDR_END - stack_size,
std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size,
MemoryState::Heap)
.MapMemoryBlock(Memory::STACK_VADDR, std::make_shared<std::vector<u8>>(stack_size, 0), 0,
stack_size, MemoryState::Mapped)
.Unwrap();
misc_memory_used += stack_size;
memory_region->used += stack_size;
@@ -153,9 +154,9 @@ void Process::LoadModule(SharedPtr<CodeSet> module_, VAddr base_addr) {
};
// Map CodeSet segments
MapSegment(module_->code, VMAPermission::ReadExecute, MemoryState::Code);
MapSegment(module_->rodata, VMAPermission::Read, MemoryState::Static);
MapSegment(module_->data, VMAPermission::ReadWrite, MemoryState::Static);
MapSegment(module_->code, VMAPermission::ReadExecute, MemoryState::CodeStatic);
MapSegment(module_->rodata, VMAPermission::Read, MemoryState::CodeMutable);
MapSegment(module_->data, VMAPermission::ReadWrite, MemoryState::CodeMutable);
}
VAddr Process::GetLinearHeapAreaAddress() const {
@@ -182,6 +183,8 @@ ResultVal<VAddr> Process::HeapAllocate(VAddr target, u64 size, VMAPermission per
// Initialize heap
heap_memory = std::make_shared<std::vector<u8>>();
heap_start = heap_end = target;
} else {
vm_manager.UnmapRange(heap_start, heap_end - heap_start);
}
// If necessary, expand backing vector to cover new heap extents.
@@ -201,7 +204,7 @@ ResultVal<VAddr> Process::HeapAllocate(VAddr target, u64 size, VMAPermission per
size, MemoryState::Heap));
vm_manager.Reprotect(vma, perms);
heap_used += size;
heap_used = size;
memory_region->used += size;
return MakeResult<VAddr>(heap_end - size);
@@ -288,7 +291,7 @@ ResultCode Process::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
CASCADE_RESULT(auto new_vma,
vm_manager.MapMemoryBlock(dst_addr, backing_block, backing_block_offset, size,
vma->second.meminfo_state));
MemoryState::Mapped));
// Protect mirror with permissions from old region
vm_manager.Reprotect(new_vma, vma->second.permissions);
// Remove permissions from old region

View File

@@ -317,13 +317,13 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
*result = Core::CurrentProcess()->allowed_thread_priority_mask;
break;
case GetInfoType::MapRegionBaseAddr:
*result = vm_manager.GetMapRegionBaseAddr();
*result = Memory::MAP_REGION_VADDR;
break;
case GetInfoType::MapRegionSize:
*result = vm_manager.GetAddressSpaceSize();
*result = Memory::MAP_REGION_SIZE;
break;
case GetInfoType::HeapRegionBaseAddr:
*result = vm_manager.GetNewMapRegionBaseAddr() + vm_manager.GetNewMapRegionSize();
*result = Memory::HEAP_VADDR;
break;
case GetInfoType::HeapRegionSize:
*result = Memory::HEAP_SIZE;
@@ -347,10 +347,10 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
*result = vm_manager.GetAddressSpaceSize();
break;
case GetInfoType::NewMapRegionBaseAddr:
*result = vm_manager.GetNewMapRegionBaseAddr();
*result = Memory::NEW_MAP_REGION_VADDR;
break;
case GetInfoType::NewMapRegionSize:
*result = vm_manager.GetNewMapRegionSize();
*result = Memory::NEW_MAP_REGION_SIZE;
break;
case GetInfoType::IsVirtualAddressMemoryEnabled:
*result = Core::CurrentProcess()->is_virtual_address_memory_enabled;
@@ -468,7 +468,7 @@ static ResultCode QueryProcessMemory(MemoryInfo* memory_info, PageInfo* /*page_i
memory_info->base_address = 0;
memory_info->permission = static_cast<u32>(VMAPermission::None);
memory_info->size = 0;
memory_info->type = static_cast<u32>(MemoryState::Free);
memory_info->type = static_cast<u32>(MemoryState::Unmapped);
} else {
memory_info->base_address = vma->second.base;
memory_info->permission = static_cast<u32>(vma->second.permissions);

View File

@@ -314,7 +314,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
// TODO(Subv): Find the correct MemoryState for this region.
vm_manager.MapMemoryBlock(Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE,
linheap_memory, offset, Memory::PAGE_SIZE,
MemoryState::ThreadLocalStorage);
MemoryState::ThreadLocal);
}
// Mark the slot as used
@@ -357,7 +357,7 @@ SharedPtr<Thread> SetupMainThread(VAddr entry_point, u32 priority,
// Initialize new "main" thread
auto thread_res = Thread::Create("main", entry_point, priority, 0, THREADPROCESSORID_0,
Memory::HEAP_VADDR_END, owner_process);
Memory::STACK_VADDR_END, owner_process);
SharedPtr<Thread> thread = std::move(thread_res).Unwrap();

View File

@@ -18,8 +18,26 @@ namespace Kernel {
static const char* GetMemoryStateName(MemoryState state) {
static const char* names[] = {
"Free", "Reserved", "IO", "Static", "Code", "Private",
"Shared", "Continuous", "Aliased", "Alias", "AliasCode", "Locked",
"Unmapped",
"Io",
"Normal",
"CodeStatic",
"CodeMutable",
"Heap",
"Shared",
"Unknown1"
"ModuleCodeStatic",
"ModuleCodeMutable",
"IpcBuffer0",
"Mapped",
"ThreadLocal",
"TransferMemoryIsolated",
"TransferMemory",
"ProcessMemory",
"Unknown2"
"IpcBuffer1",
"IpcBuffer3",
"KernelStack",
};
return names[(int)state];
@@ -142,7 +160,7 @@ VMManager::VMAIter VMManager::Unmap(VMAIter vma_handle) {
VirtualMemoryArea& vma = vma_handle->second;
vma.type = VMAType::Free;
vma.permissions = VMAPermission::None;
vma.meminfo_state = MemoryState::Free;
vma.meminfo_state = MemoryState::Unmapped;
vma.backing_block = nullptr;
vma.offset = 0;
@@ -166,6 +184,9 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {
}
ASSERT(FindVMA(target)->second.size >= size);
Core::CPU().UnmapMemory(target, size);
return RESULT_SUCCESS;
}
@@ -377,19 +398,4 @@ u64 VMManager::GetAddressSpaceSize() {
return MAX_ADDRESS;
}
VAddr VMManager::GetMapRegionBaseAddr() {
LOG_WARNING(Kernel, "(STUBBED) called");
return Memory::HEAP_VADDR;
}
VAddr VMManager::GetNewMapRegionBaseAddr() {
LOG_WARNING(Kernel, "(STUBBED) called");
return 0x8000000;
}
u64 VMManager::GetNewMapRegionSize() {
LOG_WARNING(Kernel, "(STUBBED) called");
return 0x8000000;
}
} // namespace Kernel

View File

@@ -41,15 +41,24 @@ enum class VMAPermission : u8 {
/// Set of values returned in MemoryInfo.state by svcQueryMemory.
enum class MemoryState : u32 {
Free = 0,
IO = 1,
Normal = 2,
Code = 3,
Static = 4,
Heap = 5,
Shared = 6,
Mapped = 6,
ThreadLocalStorage = 12,
Unmapped = 0x0,
Io = 0x1,
Normal = 0x2,
CodeStatic = 0x3,
CodeMutable = 0x4,
Heap = 0x5,
Shared = 0x6,
ModuleCodeStatic = 0x8,
ModuleCodeMutable = 0x9,
IpcBuffer0 = 0xA,
Mapped = 0xB,
ThreadLocal = 0xC,
TransferMemoryIsolated = 0xD,
TransferMemory = 0xE,
ProcessMemory = 0xF,
IpcBuffer1 = 0x11,
IpcBuffer3 = 0x12,
KernelStack = 0x13,
};
/**
@@ -66,7 +75,7 @@ struct VirtualMemoryArea {
VMAType type = VMAType::Free;
VMAPermission permissions = VMAPermission::None;
/// Tag returned by svcQueryMemory. Not otherwise used.
MemoryState meminfo_state = MemoryState::Free;
MemoryState meminfo_state = MemoryState::Unmapped;
// Settings for type = AllocatedMemoryBlock
/// Memory block backing this VMA.
@@ -192,15 +201,6 @@ public:
/// Gets the total address space address size, used by svcGetInfo
u64 GetAddressSpaceSize();
/// Gets the map region base address, used by svcGetInfo
VAddr GetMapRegionBaseAddr();
/// Gets the base address for a new memory region, used by svcGetInfo
VAddr GetNewMapRegionBaseAddr();
/// Gets the size for a new memory region, used by svcGetInfo
u64 GetNewMapRegionSize();
/// Each VMManager has its own page table, which is set as the main one when the owning process
/// is scheduled.
Memory::PageTable page_table;

View File

@@ -36,9 +36,7 @@ u32 BufferQueue::DequeueBuffer(u32 pixel_format, u32 width, u32 height) {
return false;
// Make sure that the parameters match.
auto& igbp_buffer = buffer.igbp_buffer;
return igbp_buffer.format == pixel_format && igbp_buffer.width == width &&
igbp_buffer.height == height;
return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height;
});
if (itr == queue.end()) {
LOG_CRITICAL(Service_NVDRV, "no free buffers for pixel_format=%d, width=%d, height=%d",

View File

@@ -414,7 +414,7 @@ ResultStatus AppLoader_ELF::Load(Kernel::SharedPtr<Kernel::Process>& process) {
process->resource_limit =
Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
process->Run(codeset->entrypoint, 48, Kernel::DEFAULT_STACK_SIZE);
process->Run(codeset->entrypoint, 48, Memory::STACK_SIZE);
is_loaded = true;
return ResultStatus::Success;

View File

@@ -137,7 +137,7 @@ ResultStatus AppLoader_NRO::Load(Kernel::SharedPtr<Kernel::Process>& process) {
process->address_mappings = default_address_mappings;
process->resource_limit =
Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
process->Run(base_addr, 48, Kernel::DEFAULT_STACK_SIZE);
process->Run(base_addr, 48, Memory::STACK_SIZE);
is_loaded = true;
return ResultStatus::Success;

View File

@@ -165,7 +165,7 @@ ResultStatus AppLoader_NSO::Load(Kernel::SharedPtr<Kernel::Process>& process) {
process->address_mappings = default_address_mappings;
process->resource_limit =
Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
process->Run(Memory::PROCESS_IMAGE_VADDR, 48, Kernel::DEFAULT_STACK_SIZE);
process->Run(Memory::PROCESS_IMAGE_VADDR, 48, Memory::STACK_SIZE);
is_loaded = true;
return ResultStatus::Success;

View File

@@ -129,21 +129,6 @@ enum : VAddr {
PROCESS_IMAGE_MAX_SIZE = 0x08000000,
PROCESS_IMAGE_VADDR_END = PROCESS_IMAGE_VADDR + PROCESS_IMAGE_MAX_SIZE,
/// Area where IPC buffers are mapped onto.
IPC_MAPPING_VADDR = 0x04000000,
IPC_MAPPING_SIZE = 0x04000000,
IPC_MAPPING_VADDR_END = IPC_MAPPING_VADDR + IPC_MAPPING_SIZE,
/// Application heap (includes stack).
HEAP_VADDR = 0x108000000,
HEAP_SIZE = 0xF0000000,
HEAP_VADDR_END = HEAP_VADDR + HEAP_SIZE,
/// Area where shared memory buffers are mapped onto.
SHARED_MEMORY_VADDR = 0x10000000,
SHARED_MEMORY_SIZE = 0x04000000,
SHARED_MEMORY_VADDR_END = SHARED_MEMORY_VADDR + SHARED_MEMORY_SIZE,
/// Maps 1:1 to an offset in FCRAM. Used for HW allocations that need to be linear in physical
/// memory.
LINEAR_HEAP_VADDR = 0x14000000,
@@ -176,14 +161,39 @@ enum : VAddr {
SHARED_PAGE_SIZE = 0x00001000,
SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
/// Area where TLS (Thread-Local Storage) buffers are allocated.
TLS_AREA_VADDR = 0x228000000,
TLS_ENTRY_SIZE = 0x200,
/// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS.
NEW_LINEAR_HEAP_VADDR = 0x30000000,
NEW_LINEAR_HEAP_SIZE = 0x10000000,
NEW_LINEAR_HEAP_VADDR_END = NEW_LINEAR_HEAP_VADDR + NEW_LINEAR_HEAP_SIZE,
/// Area where TLS (Thread-Local Storage) buffers are allocated.
TLS_AREA_VADDR = NEW_LINEAR_HEAP_VADDR_END,
TLS_ENTRY_SIZE = 0x200,
TLS_AREA_SIZE = 0x10000000,
TLS_ADREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
/// Application stack
STACK_VADDR = TLS_ADREA_VADDR_END,
STACK_SIZE = 0x10000,
STACK_VADDR_END = STACK_VADDR + STACK_SIZE,
/// Application heap
/// Size is confirmed to be a static value on fw 3.0.0
HEAP_VADDR = 0x108000000,
HEAP_SIZE = 0x180000000,
HEAP_VADDR_END = HEAP_VADDR + HEAP_SIZE,
/// New map region
/// Size is confirmed to be a static value on fw 3.0.0
NEW_MAP_REGION_VADDR = HEAP_VADDR_END,
NEW_MAP_REGION_SIZE = 0x80000000,
NEW_MAP_REGION_VADDR_END = NEW_MAP_REGION_VADDR + NEW_MAP_REGION_SIZE,
/// Map region
/// Size is confirmed to be a static value on fw 3.0.0
MAP_REGION_VADDR = NEW_MAP_REGION_VADDR_END,
MAP_REGION_SIZE = 0x1000000000,
MAP_REGION_VADDR_END = MAP_REGION_VADDR + MAP_REGION_SIZE,
};
/// Currently active page table

View File

@@ -24,12 +24,37 @@ namespace Tegra {
enum class BufferMethods {
BindObject = 0,
SetGraphMacroCode = 0x45,
SetGraphMacroCodeArg = 0x46,
SetGraphMacroEntry = 0x47,
CountBufferMethods = 0x100,
};
void GPU::WriteReg(u32 method, u32 subchannel, u32 value) {
LOG_WARNING(HW_GPU, "Processing method %08X on subchannel %u value %08X", method, subchannel,
value);
void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) {
LOG_WARNING(HW_GPU, "Processing method %08X on subchannel %u value %08X remaining params %u",
method, subchannel, value, remaining_params);
if (method == static_cast<u32>(BufferMethods::SetGraphMacroEntry)) {
// Prepare to upload a new macro, reset the upload counter.
LOG_DEBUG(HW_GPU, "Uploading GPU macro %08X", value);
current_macro_entry = value;
current_macro_code.clear();
return;
}
if (method == static_cast<u32>(BufferMethods::SetGraphMacroCodeArg)) {
// Append a new code word to the current macro.
current_macro_code.push_back(value);
// There are no more params remaining, submit the code to the 3D engine.
if (remaining_params == 0) {
maxwell_3d->SubmitMacroCode(current_macro_entry, std::move(current_macro_code));
current_macro_entry = InvalidGraphMacroEntry;
current_macro_code.clear();
}
return;
}
if (method == static_cast<u32>(BufferMethods::BindObject)) {
// Bind the current subchannel to the desired engine id.
@@ -54,7 +79,7 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value) {
fermi_2d->WriteReg(method, value);
break;
case EngineID::MAXWELL_B:
maxwell_3d->WriteReg(method, value);
maxwell_3d->WriteReg(method, value, remaining_params);
break;
case EngineID::MAXWELL_COMPUTE_B:
maxwell_compute->WriteReg(method, value);
@@ -78,7 +103,8 @@ void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
case SubmissionMode::Increasing: {
// Increase the method value with each argument.
for (unsigned i = 0; i < header.arg_count; ++i) {
WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr));
WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
header.arg_count - i - 1);
current_addr += sizeof(u32);
}
break;
@@ -87,27 +113,31 @@ void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
case SubmissionMode::NonIncreasing: {
// Use the same method value for all arguments.
for (unsigned i = 0; i < header.arg_count; ++i) {
WriteReg(header.method, header.subchannel, Memory::Read32(current_addr));
WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
header.arg_count - i - 1);
current_addr += sizeof(u32);
}
break;
}
case SubmissionMode::IncreaseOnce: {
ASSERT(header.arg_count.Value() >= 1);
// Use the original method for the first argument and then the next method for all other
// arguments.
WriteReg(header.method, header.subchannel, Memory::Read32(current_addr));
WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
header.arg_count - 1);
current_addr += sizeof(u32);
// Use the same method value for all arguments.
for (unsigned i = 1; i < header.arg_count; ++i) {
WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr));
WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
header.arg_count - i - 1);
current_addr += sizeof(u32);
}
break;
}
case SubmissionMode::Inline: {
// The register value is stored in the bits 16-28 as an immediate
WriteReg(header.method, header.subchannel, header.inline_data);
WriteReg(header.method, header.subchannel, header.inline_data, 0);
break;
}
default:

View File

@@ -34,6 +34,4 @@ static_assert(std::is_standard_layout<CommandHeader>::value == true,
"CommandHeader does not use standard layout");
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
void ProcessCommandList(VAddr address, u32 size);
} // namespace Tegra

View File

@@ -8,17 +8,101 @@
namespace Tegra {
namespace Engines {
/// First register id that is actually a Macro call.
constexpr u32 MacroRegistersStart = 0xE00;
const std::unordered_map<u32, Maxwell3D::MethodInfo> Maxwell3D::method_handlers = {
{0xE24, {"SetShader", 5, &Maxwell3D::SetShader}},
};
Maxwell3D::Maxwell3D(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
void Maxwell3D::WriteReg(u32 method, u32 value) {
void Maxwell3D::SubmitMacroCode(u32 entry, std::vector<u32> code) {
uploaded_macros[entry * 2 + MacroRegistersStart] = std::move(code);
}
void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
// TODO(Subv): Write an interpreter for the macros uploaded via registers 0x45 and 0x47
// The requested macro must have been uploaded already.
ASSERT_MSG(uploaded_macros.find(method) != uploaded_macros.end(), "Macro %08X was not uploaded",
method);
auto itr = method_handlers.find(method);
ASSERT_MSG(itr != method_handlers.end(), "Unhandled method call %08X", method);
ASSERT(itr->second.arguments == parameters.size());
(this->*itr->second.handler)(parameters);
// Reset the current macro and its parameters.
executing_macro = 0;
macro_params.clear();
}
void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
ASSERT(method == executing_macro + 1);
}
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization.
if (method >= MacroRegistersStart) {
// We're trying to execute a macro
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
ASSERT_MSG((method % 2) == 0,
"Can't start macro execution by writing to the ARGS register");
executing_macro = method;
}
macro_params.push_back(value);
// Call the macro when there are no more parameters in the command buffer
if (remaining_params == 0) {
CallMacroMethod(executing_macro, macro_params);
}
return;
}
regs.reg_array[method] = value;
#define MAXWELL3D_REG_INDEX(field_name) (offsetof(Regs, field_name) / sizeof(u32))
switch (method) {
case MAXWELL3D_REG_INDEX(code_address.code_address_high):
case MAXWELL3D_REG_INDEX(code_address.code_address_low): {
// Note: For some reason games (like Puyo Puyo Tetris) seem to write 0 to the CODE_ADDRESS
// register, we do not currently know if that's intended or a bug, so we assert it lest
// stuff breaks in other places (like the shader address calculation).
ASSERT_MSG(regs.code_address.CodeAddress() == 0, "Unexpected CODE_ADDRESS register value.");
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
ProcessCBBind(Regs::ShaderStage::Vertex);
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[1].raw_config): {
ProcessCBBind(Regs::ShaderStage::TesselationControl);
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[2].raw_config): {
ProcessCBBind(Regs::ShaderStage::TesselationEval);
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[3].raw_config): {
ProcessCBBind(Regs::ShaderStage::Geometry);
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[4].raw_config): {
ProcessCBBind(Regs::ShaderStage::Fragment);
break;
}
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): {
DrawArrays();
break;
@@ -56,5 +140,59 @@ void Maxwell3D::DrawArrays() {
LOG_WARNING(HW_GPU, "Game requested a DrawArrays, ignoring");
}
void Maxwell3D::SetShader(const std::vector<u32>& parameters) {
/**
* Parameters description:
* [0] = Shader Program.
* [1] = Unknown, presumably the shader id.
* [2] = Offset to the start of the shader, after the 0x30 bytes header.
* [3] = Shader Stage.
* [4] = Const Buffer Address >> 8.
*/
auto shader_program = static_cast<Regs::ShaderProgram>(parameters[0]);
// TODO(Subv): This address is probably an offset from the CODE_ADDRESS register.
GPUVAddr address = parameters[2];
auto shader_stage = static_cast<Regs::ShaderStage>(parameters[3]);
GPUVAddr cb_address = parameters[4] << 8;
auto& shader = state.shader_programs[static_cast<size_t>(shader_program)];
shader.program = shader_program;
shader.stage = shader_stage;
shader.address = address;
// Perform the same operations as the real macro code.
// TODO(Subv): Early exit if register 0xD1C + shader_program contains the same as params[1].
auto& shader_regs = regs.shader_config[static_cast<size_t>(shader_program)];
shader_regs.start_id = address;
// TODO(Subv): Write params[1] to register 0xD1C + shader_program.
// TODO(Subv): Write params[2] to register 0xD22 + shader_program.
// Note: This value is hardcoded in the macro's code.
static constexpr u32 DefaultCBSize = 0x10000;
regs.const_buffer.cb_size = DefaultCBSize;
regs.const_buffer.cb_address_high = cb_address >> 32;
regs.const_buffer.cb_address_low = cb_address & 0xFFFFFFFF;
// Write a hardcoded 0x11 to CB_BIND, this binds the current const buffer to buffer c1[] in the
// shader. It's likely that these are the constants for the shader.
regs.cb_bind[static_cast<size_t>(shader_stage)].valid.Assign(1);
regs.cb_bind[static_cast<size_t>(shader_stage)].index.Assign(1);
ProcessCBBind(shader_stage);
}
void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
auto& shader = state.shader_stages[static_cast<size_t>(stage)];
auto& bind_data = regs.cb_bind[static_cast<size_t>(stage)];
auto& buffer = shader.const_buffers[bind_data.index];
buffer.enabled = bind_data.valid.Value() != 0;
buffer.index = bind_data.index;
buffer.address = regs.const_buffer.BufferAddress();
buffer.size = regs.const_buffer.cb_size;
}
} // namespace Engines
} // namespace Tegra

View File

@@ -4,6 +4,9 @@
#pragma once
#include <array>
#include <unordered_map>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -18,21 +21,58 @@ public:
~Maxwell3D() = default;
/// Write the value to the register identified by method.
void WriteReg(u32 method, u32 value);
void WriteReg(u32 method, u32 value, u32 remaining_params);
/// Uploads the code for a GPU macro program associated with the specified entry.
void SubmitMacroCode(u32 entry, std::vector<u32> code);
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
struct Regs {
static constexpr size_t NUM_REGS = 0xE36;
static constexpr size_t NumCBData = 16;
static constexpr size_t NumVertexArrays = 32;
static constexpr size_t MaxShaderProgram = 6;
static constexpr size_t MaxShaderStage = 5;
// Maximum number of const buffers per shader stage.
static constexpr size_t MaxConstBuffers = 16;
enum class QueryMode : u32 {
Write = 0,
Sync = 1,
};
enum class ShaderProgram : u32 {
VertexA = 0,
VertexB = 1,
TesselationControl = 2,
TesselationEval = 3,
Geometry = 4,
Fragment = 5,
};
enum class ShaderStage : u32 {
Vertex = 0,
TesselationControl = 1,
TesselationEval = 2,
Geometry = 3,
Fragment = 4,
};
union {
struct {
INSERT_PADDING_WORDS(0x585);
INSERT_PADDING_WORDS(0x582);
struct {
u32 code_address_high;
u32 code_address_low;
GPUVAddr CodeAddress() const {
return static_cast<GPUVAddr>(
(static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low);
}
} code_address;
INSERT_PADDING_WORDS(1);
struct {
u32 vertex_end_gl;
u32 vertex_begin_gl;
@@ -54,7 +94,79 @@ public:
(static_cast<GPUVAddr>(query_address_high) << 32) | query_address_low);
}
} query;
INSERT_PADDING_WORDS(0x772);
INSERT_PADDING_WORDS(0x3C);
struct {
union {
BitField<0, 12, u32> stride;
BitField<12, 1, u32> enable;
};
u32 start_high;
u32 start_low;
u32 divisor;
GPUVAddr StartAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) |
start_low);
}
} vertex_array[NumVertexArrays];
INSERT_PADDING_WORDS(0x40);
struct {
u32 limit_high;
u32 limit_low;
GPUVAddr LimitAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
limit_low);
}
} vertex_array_limit[NumVertexArrays];
struct {
union {
BitField<0, 1, u32> enable;
BitField<4, 4, ShaderProgram> program;
};
u32 start_id;
INSERT_PADDING_WORDS(1);
u32 gpr_alloc;
ShaderStage type;
INSERT_PADDING_WORDS(9);
} shader_config[MaxShaderProgram];
INSERT_PADDING_WORDS(0x8C);
struct {
u32 cb_size;
u32 cb_address_high;
u32 cb_address_low;
u32 cb_pos;
u32 cb_data[NumCBData];
GPUVAddr BufferAddress() const {
return static_cast<GPUVAddr>(
(static_cast<GPUVAddr>(cb_address_high) << 32) | cb_address_low);
}
} const_buffer;
INSERT_PADDING_WORDS(0x10);
struct {
union {
u32 raw_config;
BitField<0, 1, u32> valid;
BitField<4, 5, u32> index;
};
INSERT_PADDING_WORDS(7);
} cb_bind[MaxShaderStage];
INSERT_PADDING_WORDS(0x56);
u32 tex_cb_index;
INSERT_PADDING_WORDS(0x4B3);
};
std::array<u32, NUM_REGS> reg_array;
};
@@ -62,21 +174,81 @@ public:
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
struct State {
struct ConstBufferInfo {
GPUVAddr address;
u32 index;
u32 size;
bool enabled;
};
struct ShaderProgramInfo {
Regs::ShaderStage stage;
Regs::ShaderProgram program;
GPUVAddr address;
};
struct ShaderStageInfo {
std::array<ConstBufferInfo, Regs::MaxConstBuffers> const_buffers;
};
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
std::array<ShaderProgramInfo, Regs::MaxShaderProgram> shader_programs;
};
State state{};
private:
MemoryManager& memory_manager;
std::unordered_map<u32, std::vector<u32>> uploaded_macros;
/// Macro method that is currently being executed / being fed parameters.
u32 executing_macro = 0;
/// Parameters that have been submitted to the macro call so far.
std::vector<u32> macro_params;
/**
* Call a macro on this engine.
* @param method Method to call
* @param parameters Arguments to the method call
*/
void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
/// Handles a write to the CB_BIND register.
void ProcessCBBind(Regs::ShaderStage stage);
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();
MemoryManager& memory_manager;
/// Method call handlers
void SetShader(const std::vector<u32>& parameters);
struct MethodInfo {
const char* name;
u32 arguments;
void (Maxwell3D::*handler)(const std::vector<u32>& parameters);
};
static const std::unordered_map<u32, MethodInfo> method_handlers;
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(query, 0x6C0);
ASSERT_REG_POSITION(vertex_array[0], 0x700);
ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
ASSERT_REG_POSITION(shader_config[0], 0x800);
ASSERT_REG_POSITION(const_buffer, 0x8E0);
ASSERT_REG_POSITION(cb_bind[0], 0x904);
ASSERT_REG_POSITION(tex_cb_index, 0x982);
#undef ASSERT_REG_POSITION

View File

@@ -6,6 +6,7 @@
#include <memory>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
@@ -38,8 +39,10 @@ public:
std::unique_ptr<MemoryManager> memory_manager;
private:
static constexpr u32 InvalidGraphMacroEntry = 0xFFFFFFFF;
/// Writes a single register in the engine bound to the specified subchannel
void WriteReg(u32 method, u32 subchannel, u32 value);
void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params);
/// Mapping of command subchannels to their bound engine ids.
std::unordered_map<u32, EngineID> bound_engines;
@@ -50,6 +53,11 @@ private:
std::unique_ptr<Engines::Fermi2D> fermi_2d;
/// Compute engine
std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
/// Entry of the macro that is currently being uploaded
u32 current_macro_entry = InvalidGraphMacroEntry;
/// Code being uploaded for the current macro
std::vector<u32> current_macro_code;
};
} // namespace Tegra