Compare commits
82 Commits
mainline-1
...
mainline-1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9be9600bdc | ||
|
|
12514ccd35 | ||
|
|
f601f25bcc | ||
|
|
27e10e0442 | ||
|
|
6738fb5fef | ||
|
|
0a67416971 | ||
|
|
369be67039 | ||
|
|
aa599ac709 | ||
|
|
a2edb27158 | ||
|
|
f470bcb826 | ||
|
|
7a35178ee2 | ||
|
|
45c162444d | ||
|
|
6c4985edc9 | ||
|
|
5d369112d9 | ||
|
|
63bda67a34 | ||
|
|
d4b95bfc25 | ||
|
|
5e457bf258 | ||
|
|
4be61013a1 | ||
|
|
5ad889f6fd | ||
|
|
7826f0afd9 | ||
|
|
8cdbfe69b1 | ||
|
|
0ff4a5fa39 | ||
|
|
fec32fed18 | ||
|
|
a081dea8ab | ||
|
|
0d3db58657 | ||
|
|
f2e7b29c14 | ||
|
|
e42bcf2314 | ||
|
|
223a535f3f | ||
|
|
c3218c110f | ||
|
|
bebbdc2067 | ||
|
|
60926ac16b | ||
|
|
44d87ff641 | ||
|
|
b56e7f870a | ||
|
|
e2d7dda166 | ||
|
|
2a4044a858 | ||
|
|
6b0d017675 | ||
|
|
56bca83bde | ||
|
|
bbecd13697 | ||
|
|
725ba6cf63 | ||
|
|
1bdb59fc6e | ||
|
|
b77a1ed67a | ||
|
|
afa8096df5 | ||
|
|
3477b92289 | ||
|
|
2ac7472d3f | ||
|
|
0f54b541f4 | ||
|
|
5818959e54 | ||
|
|
913b7a6872 | ||
|
|
a9943222f2 | ||
|
|
5c1e1a148e | ||
|
|
5d31bab69a | ||
|
|
4882c058fd | ||
|
|
093e5440e2 | ||
|
|
d4fc560c05 | ||
|
|
0eb0c24269 | ||
|
|
aca40de224 | ||
|
|
a1845d1dd3 | ||
|
|
697206092e | ||
|
|
c9d886c84e | ||
|
|
ca6f08e3b1 | ||
|
|
ce64a9fab9 | ||
|
|
b901cd584e | ||
|
|
1689784c19 | ||
|
|
13a8fde3ad | ||
|
|
56c7912159 | ||
|
|
eb6f55d880 | ||
|
|
79a23ca5f0 | ||
|
|
83050c9495 | ||
|
|
f7691ebe57 | ||
|
|
7ecf64257a | ||
|
|
9cdc576f60 | ||
|
|
1fa21fa192 | ||
|
|
32c0212b24 | ||
|
|
2bcae41a73 | ||
|
|
02ab844934 | ||
|
|
d14fbfb9b5 | ||
|
|
345f852bdb | ||
|
|
8155b12d3d | ||
|
|
f8ba72d491 | ||
|
|
b54fb8fc4c | ||
|
|
a6d2f52fc3 | ||
|
|
2b9d4088ec | ||
|
|
2e39c20da5 |
@@ -14,7 +14,7 @@ steps:
|
||||
cacheHitVar: CACHE_RESTORED
|
||||
- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/exec.sh && ./.ci/scripts/$(ScriptFolder)/exec.sh
|
||||
displayName: 'Build'
|
||||
- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/upload.sh && ./.ci/scripts/$(ScriptFolder)/upload.sh
|
||||
- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/upload.sh && RELEASE_NAME=$(BuildName) ./.ci/scripts/$(ScriptFolder)/upload.sh
|
||||
displayName: 'Package Artifacts'
|
||||
- publish: artifacts
|
||||
artifact: 'yuzu-$(BuildName)-$(BuildSuffix)'
|
||||
|
||||
@@ -3,7 +3,7 @@ jobs:
|
||||
displayName: 'standard'
|
||||
pool:
|
||||
vmImage: ubuntu-latest
|
||||
strategy:
|
||||
strategy:
|
||||
maxParallel: 10
|
||||
matrix:
|
||||
windows:
|
||||
|
||||
@@ -3,19 +3,21 @@ jobs:
|
||||
displayName: 'testing'
|
||||
pool:
|
||||
vmImage: ubuntu-latest
|
||||
strategy:
|
||||
maxParallel: 10
|
||||
strategy:
|
||||
maxParallel: 5
|
||||
matrix:
|
||||
windows:
|
||||
BuildSuffix: 'windows-testing'
|
||||
ScriptFolder: 'windows'
|
||||
steps:
|
||||
- script: pip install requests urllib3
|
||||
displayName: 'Prepare Environment'
|
||||
- task: PythonScript@0
|
||||
condition: eq(variables['Build.Reason'], 'PullRequest')
|
||||
displayName: 'Determine Testing Status'
|
||||
inputs:
|
||||
scriptSource: 'filePath'
|
||||
scriptPath: '../scripts/merge/check-label-presence.py'
|
||||
scriptPath: '.ci/scripts/merge/check-label-presence.py'
|
||||
arguments: '$(System.PullRequest.PullRequestNumber) create-testing-build'
|
||||
- ${{ if eq(variables.enabletesting, 'true') }}:
|
||||
- template: ./sync-source.yml
|
||||
@@ -27,4 +29,4 @@ jobs:
|
||||
matchLabel: 'testing-merge'
|
||||
- template: ./build-single.yml
|
||||
parameters:
|
||||
artifactSource: 'false'
|
||||
artifactSource: 'false'
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
steps:
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Windows Release'
|
||||
inputs:
|
||||
artifactName: 'yuzu-$(BuildName)-windows-mingw'
|
||||
buildType: 'current'
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)'
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Linux Release'
|
||||
inputs:
|
||||
artifactName: 'yuzu-$(BuildName)-linux'
|
||||
buildType: 'current'
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)'
|
||||
- task: DownloadPipelineArtifact@2
|
||||
displayName: 'Download Release Point'
|
||||
inputs:
|
||||
artifactName: 'yuzu-$(BuildName)-release-point'
|
||||
buildType: 'current'
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)'
|
||||
- script: echo '##vso[task.setvariable variable=tagcommit]' && cat $(Build.ArtifactStagingDirectory)/tag-commit.sha
|
||||
displayName: 'Calculate Release Point'
|
||||
- task: GitHubRelease@0
|
||||
inputs:
|
||||
gitHubConnection: $(GitHubReleaseConnectionName)
|
||||
repositoryName: '$(GitHubReleaseRepoName)'
|
||||
action: 'create'
|
||||
target: $(variables.tagcommit)
|
||||
title: 'yuzu $(BuildName) #$(Build.BuildId)'
|
||||
assets: '$(Build.ArtifactStagingDirectory)/*'
|
||||
@@ -2,6 +2,7 @@ yuzu emulator
|
||||
=============
|
||||
[](https://travis-ci.org/yuzu-emu/yuzu)
|
||||
[](https://ci.appveyor.com/project/bunnei/yuzu)
|
||||
[](https://dev.azure.com/yuzu-emu/yuzu/)
|
||||
|
||||
yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/).
|
||||
|
||||
|
||||
@@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
|
||||
|
||||
static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
|
||||
void* user_data) {
|
||||
auto* const system = static_cast<System*>(user_data);
|
||||
|
||||
ARM_Interface::ThreadContext ctx{};
|
||||
Core::CurrentArmInterface().SaveContext(ctx);
|
||||
system->CurrentArmInterface().SaveContext(ctx);
|
||||
ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
|
||||
ctx.pc, ctx.cpu_registers[30]);
|
||||
return {};
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
|
||||
@@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
|
||||
|
||||
uc_hook hook{};
|
||||
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1));
|
||||
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1));
|
||||
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1));
|
||||
if (GDBStub::IsServerEnabled()) {
|
||||
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1));
|
||||
last_bkpt_hit = false;
|
||||
|
||||
@@ -327,10 +327,6 @@ private:
|
||||
static System s_instance;
|
||||
};
|
||||
|
||||
inline ARM_Interface& CurrentArmInterface() {
|
||||
return System::GetInstance().CurrentArmInterface();
|
||||
}
|
||||
|
||||
inline Kernel::Process* CurrentProcess() {
|
||||
return System::GetInstance().CurrentProcess();
|
||||
}
|
||||
|
||||
@@ -94,6 +94,10 @@ u64 ProgramMetadata::GetFilesystemPermissions() const {
|
||||
return aci_file_access.permissions;
|
||||
}
|
||||
|
||||
u32 ProgramMetadata::GetSystemResourceSize() const {
|
||||
return npdm_header.system_resource_size;
|
||||
}
|
||||
|
||||
const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const {
|
||||
return aci_kernel_capabilities;
|
||||
}
|
||||
|
||||
@@ -58,6 +58,7 @@ public:
|
||||
u32 GetMainThreadStackSize() const;
|
||||
u64 GetTitleID() const;
|
||||
u64 GetFilesystemPermissions() const;
|
||||
u32 GetSystemResourceSize() const;
|
||||
const KernelCapabilityDescriptors& GetKernelCapabilities() const;
|
||||
|
||||
void Print() const;
|
||||
@@ -76,7 +77,8 @@ private:
|
||||
u8 reserved_3;
|
||||
u8 main_thread_priority;
|
||||
u8 main_thread_cpu;
|
||||
std::array<u8, 8> reserved_4;
|
||||
std::array<u8, 4> reserved_4;
|
||||
u32_le system_resource_size;
|
||||
u32_le process_category;
|
||||
u32_le main_stack_size;
|
||||
std::array<u8, 0x10> application_name;
|
||||
|
||||
@@ -129,20 +129,17 @@ u64 Process::GetTotalPhysicalMemoryAvailable() const {
|
||||
return vm_manager.GetTotalPhysicalMemoryAvailable();
|
||||
}
|
||||
|
||||
u64 Process::GetTotalPhysicalMemoryAvailableWithoutMmHeap() const {
|
||||
// TODO: Subtract the personal heap size from this when the
|
||||
// personal heap is implemented.
|
||||
return GetTotalPhysicalMemoryAvailable();
|
||||
u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
|
||||
return GetTotalPhysicalMemoryAvailable() - GetSystemResourceSize();
|
||||
}
|
||||
|
||||
u64 Process::GetTotalPhysicalMemoryUsed() const {
|
||||
return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size;
|
||||
return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size +
|
||||
GetSystemResourceUsage();
|
||||
}
|
||||
|
||||
u64 Process::GetTotalPhysicalMemoryUsedWithoutMmHeap() const {
|
||||
// TODO: Subtract the personal heap size from this when the
|
||||
// personal heap is implemented.
|
||||
return GetTotalPhysicalMemoryUsed();
|
||||
u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
|
||||
return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage();
|
||||
}
|
||||
|
||||
void Process::RegisterThread(const Thread* thread) {
|
||||
@@ -172,6 +169,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
|
||||
program_id = metadata.GetTitleID();
|
||||
ideal_core = metadata.GetMainThreadCore();
|
||||
is_64bit_process = metadata.Is64BitProgram();
|
||||
system_resource_size = metadata.GetSystemResourceSize();
|
||||
|
||||
vm_manager.Reset(metadata.GetAddressSpaceType());
|
||||
|
||||
@@ -186,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
|
||||
}
|
||||
|
||||
void Process::Run(s32 main_thread_priority, u64 stack_size) {
|
||||
// The kernel always ensures that the given stack size is page aligned.
|
||||
main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
|
||||
|
||||
// Allocate and map the main thread stack
|
||||
// TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
|
||||
// of the user address space.
|
||||
const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
|
||||
vm_manager
|
||||
.MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
|
||||
0, main_thread_stack_size, MemoryState::Stack)
|
||||
.Unwrap();
|
||||
AllocateMainThreadStack(stack_size);
|
||||
tls_region_address = CreateTLSRegion();
|
||||
|
||||
vm_manager.LogLayout();
|
||||
|
||||
ChangeStatus(ProcessStatus::Running);
|
||||
|
||||
SetupMainThread(*this, kernel, main_thread_priority);
|
||||
@@ -228,6 +218,9 @@ void Process::PrepareForTermination() {
|
||||
stop_threads(system.Scheduler(2).GetThreadList());
|
||||
stop_threads(system.Scheduler(3).GetThreadList());
|
||||
|
||||
FreeTLSRegion(tls_region_address);
|
||||
tls_region_address = 0;
|
||||
|
||||
ChangeStatus(ProcessStatus::Exited);
|
||||
}
|
||||
|
||||
@@ -327,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) {
|
||||
WakeupAllWaitingThreads();
|
||||
}
|
||||
|
||||
void Process::AllocateMainThreadStack(u64 stack_size) {
|
||||
// The kernel always ensures that the given stack size is page aligned.
|
||||
main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
|
||||
|
||||
// Allocate and map the main thread stack
|
||||
const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
|
||||
vm_manager
|
||||
.MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
|
||||
0, main_thread_stack_size, MemoryState::Stack)
|
||||
.Unwrap();
|
||||
}
|
||||
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -135,6 +135,11 @@ public:
|
||||
return mutex;
|
||||
}
|
||||
|
||||
/// Gets the address to the process' dedicated TLS region.
|
||||
VAddr GetTLSRegionAddress() const {
|
||||
return tls_region_address;
|
||||
}
|
||||
|
||||
/// Gets the current status of the process
|
||||
ProcessStatus GetStatus() const {
|
||||
return status;
|
||||
@@ -168,8 +173,24 @@ public:
|
||||
return capabilities.GetPriorityMask();
|
||||
}
|
||||
|
||||
u32 IsVirtualMemoryEnabled() const {
|
||||
return is_virtual_address_memory_enabled;
|
||||
/// Gets the amount of secure memory to allocate for memory management.
|
||||
u32 GetSystemResourceSize() const {
|
||||
return system_resource_size;
|
||||
}
|
||||
|
||||
/// Gets the amount of secure memory currently in use for memory management.
|
||||
u32 GetSystemResourceUsage() const {
|
||||
// On hardware, this returns the amount of system resource memory that has
|
||||
// been used by the kernel. This is problematic for Yuzu to emulate, because
|
||||
// system resource memory is used for page tables -- and yuzu doesn't really
|
||||
// have a way to calculate how much memory is required for page tables for
|
||||
// the current process at any given time.
|
||||
// TODO: Is this even worth implementing? Games may retrieve this value via
|
||||
// an SDK function that gets used + available system resource size for debug
|
||||
// or diagnostic purposes. However, it seems unlikely that a game would make
|
||||
// decisions based on how much system memory is dedicated to its page tables.
|
||||
// Is returning a value other than zero wise?
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Whether this process is an AArch64 or AArch32 process.
|
||||
@@ -196,15 +217,15 @@ public:
|
||||
u64 GetTotalPhysicalMemoryAvailable() const;
|
||||
|
||||
/// Retrieves the total physical memory available to this process in bytes,
|
||||
/// without the size of the personal heap added to it.
|
||||
u64 GetTotalPhysicalMemoryAvailableWithoutMmHeap() const;
|
||||
/// without the size of the personal system resource heap added to it.
|
||||
u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource() const;
|
||||
|
||||
/// Retrieves the total physical memory used by this process in bytes.
|
||||
u64 GetTotalPhysicalMemoryUsed() const;
|
||||
|
||||
/// Retrieves the total physical memory used by this process in bytes,
|
||||
/// without the size of the personal heap added to it.
|
||||
u64 GetTotalPhysicalMemoryUsedWithoutMmHeap() const;
|
||||
/// without the size of the personal system resource heap added to it.
|
||||
u64 GetTotalPhysicalMemoryUsedWithoutSystemResource() const;
|
||||
|
||||
/// Gets the list of all threads created with this process as their owner.
|
||||
const std::list<const Thread*>& GetThreadList() const {
|
||||
@@ -280,6 +301,9 @@ private:
|
||||
/// a process signal.
|
||||
void ChangeStatus(ProcessStatus new_status);
|
||||
|
||||
/// Allocates the main thread stack for the process, given the stack size in bytes.
|
||||
void AllocateMainThreadStack(u64 stack_size);
|
||||
|
||||
/// Memory manager for this process.
|
||||
Kernel::VMManager vm_manager;
|
||||
|
||||
@@ -298,12 +322,16 @@ private:
|
||||
/// Title ID corresponding to the process
|
||||
u64 program_id = 0;
|
||||
|
||||
/// Specifies additional memory to be reserved for the process's memory management by the
|
||||
/// system. When this is non-zero, secure memory is allocated and used for page table allocation
|
||||
/// instead of using the normal global page tables/memory block management.
|
||||
u32 system_resource_size = 0;
|
||||
|
||||
/// Resource limit descriptor for this process
|
||||
SharedPtr<ResourceLimit> resource_limit;
|
||||
|
||||
/// The ideal CPU core for this process, threads are scheduled on this core by default.
|
||||
u8 ideal_core = 0;
|
||||
u32 is_virtual_address_memory_enabled = 0;
|
||||
|
||||
/// The Thread Local Storage area is allocated as processes create threads,
|
||||
/// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
|
||||
@@ -338,6 +366,9 @@ private:
|
||||
/// variable related facilities.
|
||||
Mutex mutex;
|
||||
|
||||
/// Address indicating the location of the process' dedicated TLS region.
|
||||
VAddr tls_region_address = 0;
|
||||
|
||||
/// Random values for svcGetInfo RandomEntropy
|
||||
std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{};
|
||||
|
||||
|
||||
@@ -736,16 +736,16 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
|
||||
StackRegionBaseAddr = 14,
|
||||
StackRegionSize = 15,
|
||||
// 3.0.0+
|
||||
IsVirtualAddressMemoryEnabled = 16,
|
||||
PersonalMmHeapUsage = 17,
|
||||
SystemResourceSize = 16,
|
||||
SystemResourceUsage = 17,
|
||||
TitleId = 18,
|
||||
// 4.0.0+
|
||||
PrivilegedProcessId = 19,
|
||||
// 5.0.0+
|
||||
UserExceptionContextAddr = 20,
|
||||
// 6.0.0+
|
||||
TotalPhysicalMemoryAvailableWithoutMmHeap = 21,
|
||||
TotalPhysicalMemoryUsedWithoutMmHeap = 22,
|
||||
TotalPhysicalMemoryAvailableWithoutSystemResource = 21,
|
||||
TotalPhysicalMemoryUsedWithoutSystemResource = 22,
|
||||
};
|
||||
|
||||
const auto info_id_type = static_cast<GetInfoType>(info_id);
|
||||
@@ -763,12 +763,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
|
||||
case GetInfoType::StackRegionSize:
|
||||
case GetInfoType::TotalPhysicalMemoryAvailable:
|
||||
case GetInfoType::TotalPhysicalMemoryUsed:
|
||||
case GetInfoType::IsVirtualAddressMemoryEnabled:
|
||||
case GetInfoType::PersonalMmHeapUsage:
|
||||
case GetInfoType::SystemResourceSize:
|
||||
case GetInfoType::SystemResourceUsage:
|
||||
case GetInfoType::TitleId:
|
||||
case GetInfoType::UserExceptionContextAddr:
|
||||
case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap:
|
||||
case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: {
|
||||
case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
|
||||
case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: {
|
||||
if (info_sub_id != 0) {
|
||||
return ERR_INVALID_ENUM_VALUE;
|
||||
}
|
||||
@@ -829,8 +829,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
|
||||
*result = process->GetTotalPhysicalMemoryUsed();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::IsVirtualAddressMemoryEnabled:
|
||||
*result = process->IsVirtualMemoryEnabled();
|
||||
case GetInfoType::SystemResourceSize:
|
||||
*result = process->GetSystemResourceSize();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::SystemResourceUsage:
|
||||
LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage");
|
||||
*result = process->GetSystemResourceUsage();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::TitleId:
|
||||
@@ -838,17 +843,15 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::UserExceptionContextAddr:
|
||||
LOG_WARNING(Kernel_SVC,
|
||||
"(STUBBED) Attempted to query user exception context address, returned 0");
|
||||
*result = 0;
|
||||
*result = process->GetTLSRegionAddress();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap:
|
||||
*result = process->GetTotalPhysicalMemoryAvailable();
|
||||
case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
|
||||
*result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap:
|
||||
*result = process->GetTotalPhysicalMemoryUsedWithoutMmHeap();
|
||||
case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource:
|
||||
*result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource();
|
||||
return RESULT_SUCCESS;
|
||||
|
||||
default:
|
||||
@@ -953,6 +956,86 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
|
||||
}
|
||||
}
|
||||
|
||||
/// Maps memory at a desired address
|
||||
static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
|
||||
LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
|
||||
|
||||
if (!Common::Is4KBAligned(addr)) {
|
||||
LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
|
||||
return ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
if (!Common::Is4KBAligned(size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
|
||||
return ERR_INVALID_SIZE;
|
||||
}
|
||||
|
||||
if (size == 0) {
|
||||
LOG_ERROR(Kernel_SVC, "Size is zero");
|
||||
return ERR_INVALID_SIZE;
|
||||
}
|
||||
|
||||
if (!(addr < addr + size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
|
||||
return ERR_INVALID_MEMORY_RANGE;
|
||||
}
|
||||
|
||||
Process* const current_process = system.Kernel().CurrentProcess();
|
||||
auto& vm_manager = current_process->VMManager();
|
||||
|
||||
if (current_process->GetSystemResourceSize() == 0) {
|
||||
LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
if (!vm_manager.IsWithinMapRegion(addr, size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Range not within map region");
|
||||
return ERR_INVALID_MEMORY_RANGE;
|
||||
}
|
||||
|
||||
return vm_manager.MapPhysicalMemory(addr, size);
|
||||
}
|
||||
|
||||
/// Unmaps memory previously mapped via MapPhysicalMemory
|
||||
static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
|
||||
LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
|
||||
|
||||
if (!Common::Is4KBAligned(addr)) {
|
||||
LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
|
||||
return ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
if (!Common::Is4KBAligned(size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
|
||||
return ERR_INVALID_SIZE;
|
||||
}
|
||||
|
||||
if (size == 0) {
|
||||
LOG_ERROR(Kernel_SVC, "Size is zero");
|
||||
return ERR_INVALID_SIZE;
|
||||
}
|
||||
|
||||
if (!(addr < addr + size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
|
||||
return ERR_INVALID_MEMORY_RANGE;
|
||||
}
|
||||
|
||||
Process* const current_process = system.Kernel().CurrentProcess();
|
||||
auto& vm_manager = current_process->VMManager();
|
||||
|
||||
if (current_process->GetSystemResourceSize() == 0) {
|
||||
LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
if (!vm_manager.IsWithinMapRegion(addr, size)) {
|
||||
LOG_ERROR(Kernel_SVC, "Range not within map region");
|
||||
return ERR_INVALID_MEMORY_RANGE;
|
||||
}
|
||||
|
||||
return vm_manager.UnmapPhysicalMemory(addr, size);
|
||||
}
|
||||
|
||||
/// Sets the thread activity
|
||||
static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
|
||||
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
|
||||
@@ -1654,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
|
||||
// Wait for an address (via Address Arbiter)
|
||||
static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
|
||||
s64 timeout) {
|
||||
LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}",
|
||||
address, type, value, timeout);
|
||||
LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
|
||||
type, value, timeout);
|
||||
|
||||
// If the passed address is a kernel virtual address, return invalid memory state.
|
||||
if (Memory::IsKernelVirtualAddress(address)) {
|
||||
@@ -1677,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
|
||||
// Signals to an address (via Address Arbiter)
|
||||
static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
|
||||
s32 num_to_wake) {
|
||||
LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
|
||||
address, type, value, num_to_wake);
|
||||
LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
|
||||
address, type, value, num_to_wake);
|
||||
|
||||
// If the passed address is a kernel virtual address, return invalid memory state.
|
||||
if (Memory::IsKernelVirtualAddress(address)) {
|
||||
@@ -2310,8 +2393,8 @@ static const FunctionDef SVC_Table[] = {
|
||||
{0x29, SvcWrap<GetInfo>, "GetInfo"},
|
||||
{0x2A, nullptr, "FlushEntireDataCache"},
|
||||
{0x2B, nullptr, "FlushDataCache"},
|
||||
{0x2C, nullptr, "MapPhysicalMemory"},
|
||||
{0x2D, nullptr, "UnmapPhysicalMemory"},
|
||||
{0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"},
|
||||
{0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"},
|
||||
{0x2E, nullptr, "GetFutureThreadInfo"},
|
||||
{0x2F, nullptr, "GetLastThreadInfo"},
|
||||
{0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"},
|
||||
|
||||
@@ -32,6 +32,11 @@ void SvcWrap(Core::System& system) {
|
||||
FuncReturn(system, func(system, Param(system, 0)).raw);
|
||||
}
|
||||
|
||||
template <ResultCode func(Core::System&, u64, u64)>
|
||||
void SvcWrap(Core::System& system) {
|
||||
FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw);
|
||||
}
|
||||
|
||||
template <ResultCode func(Core::System&, u32)>
|
||||
void SvcWrap(Core::System& system) {
|
||||
FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#include "core/core.h"
|
||||
#include "core/file_sys/program_metadata.h"
|
||||
#include "core/hle/kernel/errors.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/hle/kernel/resource_limit.h"
|
||||
#include "core/hle/kernel/vm_manager.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/memory_setup.h"
|
||||
@@ -48,10 +50,14 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
|
||||
type != next.type) {
|
||||
return false;
|
||||
}
|
||||
if (type == VMAType::AllocatedMemoryBlock &&
|
||||
(backing_block != next.backing_block || offset + size != next.offset)) {
|
||||
if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) {
|
||||
// TODO: Can device mapped memory be merged sanely?
|
||||
// Not merging it may cause inaccuracies versus hardware when memory layout is queried.
|
||||
return false;
|
||||
}
|
||||
if (type == VMAType::AllocatedMemoryBlock) {
|
||||
return true;
|
||||
}
|
||||
if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) {
|
||||
return false;
|
||||
}
|
||||
@@ -99,7 +105,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const {
|
||||
ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
|
||||
std::shared_ptr<std::vector<u8>> block,
|
||||
std::size_t offset, u64 size,
|
||||
MemoryState state) {
|
||||
MemoryState state, VMAPermission perm) {
|
||||
ASSERT(block != nullptr);
|
||||
ASSERT(offset + size <= block->size());
|
||||
|
||||
@@ -109,7 +115,7 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
|
||||
ASSERT(final_vma.size == size);
|
||||
|
||||
final_vma.type = VMAType::AllocatedMemoryBlock;
|
||||
final_vma.permissions = VMAPermission::ReadWrite;
|
||||
final_vma.permissions = perm;
|
||||
final_vma.state = state;
|
||||
final_vma.backing_block = std::move(block);
|
||||
final_vma.offset = offset;
|
||||
@@ -288,6 +294,166 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
|
||||
return MakeResult<VAddr>(heap_region_base);
|
||||
}
|
||||
|
||||
ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
|
||||
const auto end_addr = target + size;
|
||||
const auto last_addr = end_addr - 1;
|
||||
VAddr cur_addr = target;
|
||||
|
||||
ResultCode result = RESULT_SUCCESS;
|
||||
|
||||
// Check how much memory we've already mapped.
|
||||
const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size);
|
||||
if (mapped_size_result.Failed()) {
|
||||
return mapped_size_result.Code();
|
||||
}
|
||||
|
||||
// If we've already mapped the desired amount, return early.
|
||||
const std::size_t mapped_size = *mapped_size_result;
|
||||
if (mapped_size == size) {
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
// Check that we can map the memory we want.
|
||||
const auto res_limit = system.CurrentProcess()->GetResourceLimit();
|
||||
const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) -
|
||||
res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory);
|
||||
if (physmem_remaining < (size - mapped_size)) {
|
||||
return ERR_RESOURCE_LIMIT_EXCEEDED;
|
||||
}
|
||||
|
||||
// Keep track of the memory regions we unmap.
|
||||
std::vector<std::pair<u64, u64>> mapped_regions;
|
||||
|
||||
// Iterate, trying to map memory.
|
||||
{
|
||||
cur_addr = target;
|
||||
|
||||
auto iter = FindVMA(target);
|
||||
ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
|
||||
|
||||
while (true) {
|
||||
const auto& vma = iter->second;
|
||||
const auto vma_start = vma.base;
|
||||
const auto vma_end = vma_start + vma.size;
|
||||
const auto vma_last = vma_end - 1;
|
||||
|
||||
// Map the memory block
|
||||
const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
|
||||
if (vma.state == MemoryState::Unmapped) {
|
||||
const auto map_res =
|
||||
MapMemoryBlock(cur_addr, std::make_shared<std::vector<u8>>(map_size, 0), 0,
|
||||
map_size, MemoryState::Heap, VMAPermission::ReadWrite);
|
||||
result = map_res.Code();
|
||||
if (result.IsError()) {
|
||||
break;
|
||||
}
|
||||
|
||||
mapped_regions.emplace_back(cur_addr, map_size);
|
||||
}
|
||||
|
||||
// Break once we hit the end of the range.
|
||||
if (last_addr <= vma_last) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Advance to the next block.
|
||||
cur_addr = vma_end;
|
||||
iter = FindVMA(cur_addr);
|
||||
ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
|
||||
}
|
||||
}
|
||||
|
||||
// If we failed, unmap memory.
|
||||
if (result.IsError()) {
|
||||
for (const auto [unmap_address, unmap_size] : mapped_regions) {
|
||||
ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(),
|
||||
"MapPhysicalMemory un-map on error");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Update amount of mapped physical memory.
|
||||
physical_memory_mapped += size - mapped_size;
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
|
||||
const auto end_addr = target + size;
|
||||
const auto last_addr = end_addr - 1;
|
||||
VAddr cur_addr = target;
|
||||
|
||||
ResultCode result = RESULT_SUCCESS;
|
||||
|
||||
// Check how much memory is currently mapped.
|
||||
const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size);
|
||||
if (mapped_size_result.Failed()) {
|
||||
return mapped_size_result.Code();
|
||||
}
|
||||
|
||||
// If we've already unmapped all the memory, return early.
|
||||
const std::size_t mapped_size = *mapped_size_result;
|
||||
if (mapped_size == 0) {
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
// Keep track of the memory regions we unmap.
|
||||
std::vector<std::pair<u64, u64>> unmapped_regions;
|
||||
|
||||
// Try to unmap regions.
|
||||
{
|
||||
cur_addr = target;
|
||||
|
||||
auto iter = FindVMA(target);
|
||||
ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
|
||||
|
||||
while (true) {
|
||||
const auto& vma = iter->second;
|
||||
const auto vma_start = vma.base;
|
||||
const auto vma_end = vma_start + vma.size;
|
||||
const auto vma_last = vma_end - 1;
|
||||
|
||||
// Unmap the memory block
|
||||
const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
|
||||
if (vma.state == MemoryState::Heap) {
|
||||
result = UnmapRange(cur_addr, unmap_size);
|
||||
if (result.IsError()) {
|
||||
break;
|
||||
}
|
||||
|
||||
unmapped_regions.emplace_back(cur_addr, unmap_size);
|
||||
}
|
||||
|
||||
// Break once we hit the end of the range.
|
||||
if (last_addr <= vma_last) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Advance to the next block.
|
||||
cur_addr = vma_end;
|
||||
iter = FindVMA(cur_addr);
|
||||
ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
|
||||
}
|
||||
}
|
||||
|
||||
// If we failed, re-map regions.
|
||||
// TODO: Preserve memory contents?
|
||||
if (result.IsError()) {
|
||||
for (const auto [map_address, map_size] : unmapped_regions) {
|
||||
const auto remap_res =
|
||||
MapMemoryBlock(map_address, std::make_shared<std::vector<u8>>(map_size, 0), 0,
|
||||
map_size, MemoryState::Heap, VMAPermission::None);
|
||||
ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error");
|
||||
}
|
||||
}
|
||||
|
||||
// Update mapped amount
|
||||
physical_memory_mapped -= mapped_size;
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) {
|
||||
constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped;
|
||||
const auto src_check_result = CheckRangeState(
|
||||
@@ -435,7 +601,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
|
||||
// Protect mirror with permissions from old region
|
||||
Reprotect(new_vma, vma->second.permissions);
|
||||
// Remove permissions from old region
|
||||
Reprotect(vma, VMAPermission::None);
|
||||
ReprotectRange(src_addr, size, VMAPermission::None);
|
||||
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
@@ -568,14 +734,14 @@ VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
|
||||
VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
|
||||
const VMAIter next_vma = std::next(iter);
|
||||
if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
|
||||
iter->second.size += next_vma->second.size;
|
||||
MergeAdjacentVMA(iter->second, next_vma->second);
|
||||
vma_map.erase(next_vma);
|
||||
}
|
||||
|
||||
if (iter != vma_map.begin()) {
|
||||
VMAIter prev_vma = std::prev(iter);
|
||||
if (prev_vma->second.CanBeMergedWith(iter->second)) {
|
||||
prev_vma->second.size += iter->second.size;
|
||||
MergeAdjacentVMA(prev_vma->second, iter->second);
|
||||
vma_map.erase(iter);
|
||||
iter = prev_vma;
|
||||
}
|
||||
@@ -584,6 +750,38 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
|
||||
return iter;
|
||||
}
|
||||
|
||||
void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) {
|
||||
ASSERT(left.CanBeMergedWith(right));
|
||||
|
||||
// Always merge allocated memory blocks, even when they don't share the same backing block.
|
||||
if (left.type == VMAType::AllocatedMemoryBlock &&
|
||||
(left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
|
||||
// Check if we can save work.
|
||||
if (left.offset == 0 && left.size == left.backing_block->size()) {
|
||||
// Fast case: left is an entire backing block.
|
||||
left.backing_block->insert(left.backing_block->end(),
|
||||
right.backing_block->begin() + right.offset,
|
||||
right.backing_block->begin() + right.offset + right.size);
|
||||
} else {
|
||||
// Slow case: make a new memory block for left and right.
|
||||
auto new_memory = std::make_shared<std::vector<u8>>();
|
||||
new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset,
|
||||
left.backing_block->begin() + left.offset + left.size);
|
||||
new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset,
|
||||
right.backing_block->begin() + right.offset + right.size);
|
||||
left.backing_block = new_memory;
|
||||
left.offset = 0;
|
||||
}
|
||||
|
||||
// Page table update is needed, because backing memory changed.
|
||||
left.size += right.size;
|
||||
UpdatePageTableForVMA(left);
|
||||
} else {
|
||||
// Just update the size.
|
||||
left.size += right.size;
|
||||
}
|
||||
}
|
||||
|
||||
void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
|
||||
switch (vma.type) {
|
||||
case VMAType::Free:
|
||||
@@ -758,6 +956,84 @@ VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, Memo
|
||||
std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask));
|
||||
}
|
||||
|
||||
ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address,
|
||||
std::size_t size) const {
|
||||
const VAddr end_addr = address + size;
|
||||
const VAddr last_addr = end_addr - 1;
|
||||
std::size_t mapped_size = 0;
|
||||
|
||||
VAddr cur_addr = address;
|
||||
auto iter = FindVMA(cur_addr);
|
||||
ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
|
||||
|
||||
while (true) {
|
||||
const auto& vma = iter->second;
|
||||
const VAddr vma_start = vma.base;
|
||||
const VAddr vma_end = vma_start + vma.size;
|
||||
const VAddr vma_last = vma_end - 1;
|
||||
|
||||
// Add size if relevant.
|
||||
if (vma.state != MemoryState::Unmapped) {
|
||||
mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
|
||||
}
|
||||
|
||||
// Break once we hit the end of the range.
|
||||
if (last_addr <= vma_last) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Advance to the next block.
|
||||
cur_addr = vma_end;
|
||||
iter = std::next(iter);
|
||||
ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
|
||||
}
|
||||
|
||||
return MakeResult(mapped_size);
|
||||
}
|
||||
|
||||
ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
|
||||
std::size_t size) const {
|
||||
const VAddr end_addr = address + size;
|
||||
const VAddr last_addr = end_addr - 1;
|
||||
std::size_t mapped_size = 0;
|
||||
|
||||
VAddr cur_addr = address;
|
||||
auto iter = FindVMA(cur_addr);
|
||||
ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
|
||||
|
||||
while (true) {
|
||||
const auto& vma = iter->second;
|
||||
const auto vma_start = vma.base;
|
||||
const auto vma_end = vma_start + vma.size;
|
||||
const auto vma_last = vma_end - 1;
|
||||
const auto state = vma.state;
|
||||
const auto attr = vma.attribute;
|
||||
|
||||
// Memory within region must be free or mapped heap.
|
||||
if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) ||
|
||||
(state == MemoryState::Unmapped))) {
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
|
||||
// Add size if relevant.
|
||||
if (state != MemoryState::Unmapped) {
|
||||
mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
|
||||
}
|
||||
|
||||
// Break once we hit the end of the range.
|
||||
if (last_addr <= vma_last) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Advance to the next block.
|
||||
cur_addr = vma_end;
|
||||
iter = std::next(iter);
|
||||
ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
|
||||
}
|
||||
|
||||
return MakeResult(mapped_size);
|
||||
}
|
||||
|
||||
u64 VMManager::GetTotalPhysicalMemoryAvailable() const {
|
||||
LOG_WARNING(Kernel, "(STUBBED) called");
|
||||
return 0xF8000000;
|
||||
|
||||
@@ -349,7 +349,8 @@ public:
|
||||
* @param state MemoryState tag to attach to the VMA.
|
||||
*/
|
||||
ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block,
|
||||
std::size_t offset, u64 size, MemoryState state);
|
||||
std::size_t offset, u64 size, MemoryState state,
|
||||
VMAPermission perm = VMAPermission::ReadWrite);
|
||||
|
||||
/**
|
||||
* Maps an unmanaged host memory pointer at a given address.
|
||||
@@ -450,6 +451,34 @@ public:
|
||||
///
|
||||
ResultVal<VAddr> SetHeapSize(u64 size);
|
||||
|
||||
/// Maps memory at a given address.
|
||||
///
|
||||
/// @param addr The virtual address to map memory at.
|
||||
/// @param size The amount of memory to map.
|
||||
///
|
||||
/// @note The destination address must lie within the Map region.
|
||||
///
|
||||
/// @note This function requires that SystemResourceSize be non-zero,
|
||||
/// however, this is just because if it were not then the
|
||||
/// resulting page tables could be exploited on hardware by
|
||||
/// a malicious program. SystemResource usage does not need
|
||||
/// to be explicitly checked or updated here.
|
||||
ResultCode MapPhysicalMemory(VAddr target, u64 size);
|
||||
|
||||
/// Unmaps memory at a given address.
|
||||
///
|
||||
/// @param addr The virtual address to unmap memory at.
|
||||
/// @param size The amount of memory to unmap.
|
||||
///
|
||||
/// @note The destination address must lie within the Map region.
|
||||
///
|
||||
/// @note This function requires that SystemResourceSize be non-zero,
|
||||
/// however, this is just because if it were not then the
|
||||
/// resulting page tables could be exploited on hardware by
|
||||
/// a malicious program. SystemResource usage does not need
|
||||
/// to be explicitly checked or updated here.
|
||||
ResultCode UnmapPhysicalMemory(VAddr target, u64 size);
|
||||
|
||||
/// Maps a region of memory as code memory.
|
||||
///
|
||||
/// @param dst_address The base address of the region to create the aliasing memory region.
|
||||
@@ -657,6 +686,11 @@ private:
|
||||
*/
|
||||
VMAIter MergeAdjacent(VMAIter vma);
|
||||
|
||||
/**
|
||||
* Merges two adjacent VMAs.
|
||||
*/
|
||||
void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right);
|
||||
|
||||
/// Updates the pages corresponding to this VMA so they match the VMA's attributes.
|
||||
void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
|
||||
|
||||
@@ -701,6 +735,13 @@ private:
|
||||
MemoryAttribute attribute_mask, MemoryAttribute attribute,
|
||||
MemoryAttribute ignore_mask) const;
|
||||
|
||||
/// Gets the amount of memory currently mapped (state != Unmapped) in a range.
|
||||
ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const;
|
||||
|
||||
/// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range.
|
||||
ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
|
||||
std::size_t size) const;
|
||||
|
||||
/**
|
||||
* A map covering the entirety of the managed address space, keyed by the `base` field of each
|
||||
* VMA. It must always be modified by splitting or merging VMAs, so that the invariant
|
||||
@@ -742,6 +783,11 @@ private:
|
||||
// end of the range. This is essentially 'base_address + current_size'.
|
||||
VAddr heap_end = 0;
|
||||
|
||||
// The current amount of memory mapped via MapPhysicalMemory.
|
||||
// This is used here (and in Nintendo's kernel) only for debugging, and does not impact
|
||||
// any behavior.
|
||||
u64 physical_memory_mapped = 0;
|
||||
|
||||
Core::System& system;
|
||||
};
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
add_library(video_core STATIC
|
||||
buffer_cache.h
|
||||
dma_pusher.cpp
|
||||
dma_pusher.h
|
||||
debug_utils/debug_utils.cpp
|
||||
@@ -43,8 +44,6 @@ add_library(video_core STATIC
|
||||
renderer_opengl/gl_device.h
|
||||
renderer_opengl/gl_framebuffer_cache.cpp
|
||||
renderer_opengl/gl_framebuffer_cache.h
|
||||
renderer_opengl/gl_global_cache.cpp
|
||||
renderer_opengl/gl_global_cache.h
|
||||
renderer_opengl/gl_rasterizer.cpp
|
||||
renderer_opengl/gl_rasterizer.h
|
||||
renderer_opengl/gl_resource_manager.cpp
|
||||
|
||||
299
src/video_core/buffer_cache.h
Normal file
299
src/video_core/buffer_cache.h
Normal file
@@ -0,0 +1,299 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
template <typename BufferStorageType>
|
||||
class CachedBuffer final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr)
|
||||
: RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {}
|
||||
~CachedBuffer() override = default;
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
||||
u8* GetWritableHostPtr() const {
|
||||
return host_ptr;
|
||||
}
|
||||
|
||||
std::size_t GetSize() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
std::size_t GetCapacity() const {
|
||||
return capacity;
|
||||
}
|
||||
|
||||
bool IsInternalized() const {
|
||||
return is_internal;
|
||||
}
|
||||
|
||||
const BufferStorageType& GetBuffer() const {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void SetSize(std::size_t new_size) {
|
||||
size = new_size;
|
||||
}
|
||||
|
||||
void SetInternalState(bool is_internal_) {
|
||||
is_internal = is_internal_;
|
||||
}
|
||||
|
||||
BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) {
|
||||
capacity = new_capacity;
|
||||
std::swap(buffer, buffer_);
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
private:
|
||||
u8* host_ptr{};
|
||||
VAddr cpu_addr{};
|
||||
std::size_t size{};
|
||||
std::size_t capacity{};
|
||||
bool is_internal{};
|
||||
BufferStorageType buffer;
|
||||
};
|
||||
|
||||
template <typename BufferStorageType, typename BufferType, typename StreamBuffer>
|
||||
class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> {
|
||||
public:
|
||||
using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>;
|
||||
using BufferInfo = std::pair<const BufferType*, u64>;
|
||||
|
||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||
std::unique_ptr<StreamBuffer> stream_buffer)
|
||||
: RasterizerCache<Buffer>{rasterizer}, system{system},
|
||||
stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{
|
||||
this->stream_buffer->GetHandle()} {}
|
||||
~BufferCache() = default;
|
||||
|
||||
void Unregister(const Buffer& entry) override {
|
||||
std::lock_guard lock{RasterizerCache<Buffer>::mutex};
|
||||
if (entry->IsInternalized()) {
|
||||
internalized_entries.erase(entry->GetCacheAddr());
|
||||
}
|
||||
ReserveBuffer(entry);
|
||||
RasterizerCache<Buffer>::Unregister(entry);
|
||||
}
|
||||
|
||||
void TickFrame() {
|
||||
marked_for_destruction_index =
|
||||
(marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size();
|
||||
MarkedForDestruction().clear();
|
||||
}
|
||||
|
||||
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
|
||||
bool internalize = false, bool is_written = false) {
|
||||
std::lock_guard lock{RasterizerCache<Buffer>::mutex};
|
||||
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
if (!host_ptr) {
|
||||
return {GetEmptyBuffer(size), 0};
|
||||
}
|
||||
const auto cache_addr = ToCacheAddr(host_ptr);
|
||||
|
||||
// Cache management is a big overhead, so only cache entries with a given size.
|
||||
// TODO: Figure out which size is the best for given games.
|
||||
constexpr std::size_t max_stream_size = 0x800;
|
||||
if (!internalize && size < max_stream_size &&
|
||||
internalized_entries.find(cache_addr) == internalized_entries.end()) {
|
||||
return StreamBufferUpload(host_ptr, size, alignment);
|
||||
}
|
||||
|
||||
auto entry = RasterizerCache<Buffer>::TryGet(cache_addr);
|
||||
if (!entry) {
|
||||
return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written);
|
||||
}
|
||||
|
||||
if (entry->GetSize() < size) {
|
||||
IncreaseBufferSize(entry, size);
|
||||
}
|
||||
if (is_written) {
|
||||
entry->MarkAsModified(true, *this);
|
||||
}
|
||||
return {ToHandle(entry->GetBuffer()), 0};
|
||||
}
|
||||
|
||||
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
|
||||
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
|
||||
std::size_t alignment = 4) {
|
||||
std::lock_guard lock{RasterizerCache<Buffer>::mutex};
|
||||
return StreamBufferUpload(raw_pointer, size, alignment);
|
||||
}
|
||||
|
||||
void Map(std::size_t max_size) {
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
|
||||
buffer_offset = buffer_offset_base;
|
||||
}
|
||||
|
||||
/// Finishes the upload stream, returns true on bindings invalidation.
|
||||
bool Unmap() {
|
||||
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
|
||||
return std::exchange(invalidated, false);
|
||||
}
|
||||
|
||||
virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0;
|
||||
|
||||
protected:
|
||||
void FlushObjectInner(const Buffer& entry) override {
|
||||
DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr());
|
||||
}
|
||||
|
||||
virtual BufferStorageType CreateBuffer(std::size_t size) = 0;
|
||||
|
||||
virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0;
|
||||
|
||||
virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset,
|
||||
std::size_t size, const u8* data) = 0;
|
||||
|
||||
virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset,
|
||||
std::size_t size, u8* data) = 0;
|
||||
|
||||
virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst,
|
||||
std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t size) = 0;
|
||||
|
||||
private:
|
||||
BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
|
||||
std::size_t alignment) {
|
||||
AlignBuffer(alignment);
|
||||
const std::size_t uploaded_offset = buffer_offset;
|
||||
std::memcpy(buffer_ptr, raw_pointer, size);
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return {&stream_buffer_handle, uploaded_offset};
|
||||
}
|
||||
|
||||
BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size,
|
||||
bool internalize, bool is_written) {
|
||||
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
auto entry = GetUncachedBuffer(*cpu_addr, host_ptr);
|
||||
entry->SetSize(size);
|
||||
entry->SetInternalState(internalize);
|
||||
RasterizerCache<Buffer>::Register(entry);
|
||||
|
||||
if (internalize) {
|
||||
internalized_entries.emplace(ToCacheAddr(host_ptr));
|
||||
}
|
||||
if (is_written) {
|
||||
entry->MarkAsModified(true, *this);
|
||||
}
|
||||
|
||||
if (entry->GetCapacity() < size) {
|
||||
MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size));
|
||||
}
|
||||
|
||||
UploadBufferData(entry->GetBuffer(), 0, size, host_ptr);
|
||||
return {ToHandle(entry->GetBuffer()), 0};
|
||||
}
|
||||
|
||||
void IncreaseBufferSize(Buffer& entry, std::size_t new_size) {
|
||||
const std::size_t old_size = entry->GetSize();
|
||||
if (entry->GetCapacity() < new_size) {
|
||||
const auto& old_buffer = entry->GetBuffer();
|
||||
auto new_buffer = CreateBuffer(new_size);
|
||||
|
||||
// Copy bits from the old buffer to the new buffer.
|
||||
CopyBufferData(old_buffer, new_buffer, 0, 0, old_size);
|
||||
MarkedForDestruction().push_back(
|
||||
entry->ExchangeBuffer(std::move(new_buffer), new_size));
|
||||
|
||||
// This buffer could have been used
|
||||
invalidated = true;
|
||||
}
|
||||
// Upload the new bits.
|
||||
const std::size_t size_diff = new_size - old_size;
|
||||
UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size);
|
||||
|
||||
// Update entry's size in the object and in the cache.
|
||||
Unregister(entry);
|
||||
|
||||
entry->SetSize(new_size);
|
||||
RasterizerCache<Buffer>::Register(entry);
|
||||
}
|
||||
|
||||
Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) {
|
||||
if (auto entry = TryGetReservedBuffer(host_ptr)) {
|
||||
return entry;
|
||||
}
|
||||
return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr);
|
||||
}
|
||||
|
||||
Buffer TryGetReservedBuffer(u8* host_ptr) {
|
||||
const auto it = buffer_reserve.find(ToCacheAddr(host_ptr));
|
||||
if (it == buffer_reserve.end()) {
|
||||
return {};
|
||||
}
|
||||
auto& reserve = it->second;
|
||||
auto entry = reserve.back();
|
||||
reserve.pop_back();
|
||||
return entry;
|
||||
}
|
||||
|
||||
void ReserveBuffer(Buffer entry) {
|
||||
buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry));
|
||||
}
|
||||
|
||||
void AlignBuffer(std::size_t alignment) {
|
||||
// Align the offset, not the mapped pointer
|
||||
const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
|
||||
buffer_ptr += offset_aligned - buffer_offset;
|
||||
buffer_offset = offset_aligned;
|
||||
}
|
||||
|
||||
std::vector<BufferStorageType>& MarkedForDestruction() {
|
||||
return marked_for_destruction_ring_buffer[marked_for_destruction_index];
|
||||
}
|
||||
|
||||
Core::System& system;
|
||||
|
||||
std::unique_ptr<StreamBuffer> stream_buffer;
|
||||
BufferType stream_buffer_handle{};
|
||||
|
||||
bool invalidated = false;
|
||||
|
||||
u8* buffer_ptr = nullptr;
|
||||
u64 buffer_offset = 0;
|
||||
u64 buffer_offset_base = 0;
|
||||
|
||||
std::size_t marked_for_destruction_index = 0;
|
||||
std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer;
|
||||
|
||||
std::unordered_set<CacheAddr> internalized_entries;
|
||||
std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
|
||||
MICROPROFILE_SCOPE(DispatchCalls);
|
||||
|
||||
// On entering GPU code, assume all memory may be touched by the ARM core.
|
||||
gpu.Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
gpu.Maxwell3D().dirty.OnMemoryWrite();
|
||||
|
||||
dma_pushbuffer_subindex = 0;
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
|
||||
const bool is_last_call = method_call.IsLastCall();
|
||||
upload_state.ProcessData(method_call.argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
system.GPU().Maxwell3D().dirty.OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
|
||||
}
|
||||
|
||||
void KeplerCompute::ProcessLaunch() {
|
||||
|
||||
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
|
||||
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
|
||||
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
|
||||
|
||||
const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
|
||||
LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
|
||||
const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
|
||||
LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
|
||||
|
||||
rasterizer.DispatchCompute(code_addr);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
||||
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
|
||||
const bool is_last_call = method_call.IsLastCall();
|
||||
upload_state.ProcessData(method_call.argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
system.GPU().Maxwell3D().dirty.OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
|
||||
MemoryManager& memory_manager)
|
||||
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
|
||||
macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
|
||||
InitDirtySettings();
|
||||
InitializeRegisterDefaults();
|
||||
}
|
||||
|
||||
@@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
regs.stencil_back_func_mask = 0xFFFFFFFF;
|
||||
regs.stencil_back_mask = 0xFFFFFFFF;
|
||||
|
||||
regs.depth_test_func = Regs::ComparisonOp::Always;
|
||||
regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
|
||||
regs.cull.cull_face = Regs::Cull::CullFace::Back;
|
||||
|
||||
// TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
|
||||
// register carrying a default value. Assume it's OpenGL's default (1).
|
||||
regs.point_size = 1.0f;
|
||||
@@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
regs.rt_separate_frag_data = 1;
|
||||
}
|
||||
|
||||
#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
|
||||
|
||||
void Maxwell3D::InitDirtySettings() {
|
||||
const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
|
||||
const auto start_itr = dirty_pointers.begin() + start;
|
||||
const auto end_itr = start_itr + range;
|
||||
std::fill(start_itr, end_itr, position);
|
||||
};
|
||||
dirty.regs.fill(true);
|
||||
|
||||
// Init Render Targets
|
||||
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
|
||||
constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
|
||||
constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
|
||||
u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
|
||||
for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
|
||||
set_block(rt_reg, registers_per_rt, rt_dirty_reg);
|
||||
rt_dirty_reg++;
|
||||
}
|
||||
constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
|
||||
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
|
||||
constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
|
||||
set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
|
||||
|
||||
// Init Vertex Arrays
|
||||
constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
|
||||
constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
|
||||
constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
|
||||
u32 va_reg = DIRTY_REGS_POS(vertex_array);
|
||||
u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
|
||||
for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
|
||||
vertex_reg += vertex_array_size) {
|
||||
set_block(vertex_reg, 3, va_reg);
|
||||
// The divisor concerns vertex array instances
|
||||
dirty_pointers[vertex_reg + 3] = vi_reg;
|
||||
va_reg++;
|
||||
vi_reg++;
|
||||
}
|
||||
constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
|
||||
constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
|
||||
constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
|
||||
va_reg = DIRTY_REGS_POS(vertex_array);
|
||||
for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
|
||||
vertex_reg += vertex_limit_size) {
|
||||
set_block(vertex_reg, vertex_limit_size, va_reg);
|
||||
va_reg++;
|
||||
}
|
||||
constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
|
||||
constexpr u32 vertex_instance_size =
|
||||
sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
|
||||
constexpr u32 vertex_instance_end =
|
||||
vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
|
||||
vi_reg = DIRTY_REGS_POS(vertex_instance);
|
||||
for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
|
||||
vertex_reg += vertex_instance_size) {
|
||||
set_block(vertex_reg, vertex_instance_size, vi_reg);
|
||||
vi_reg++;
|
||||
}
|
||||
set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
|
||||
DIRTY_REGS_POS(vertex_attrib_format));
|
||||
|
||||
// Init Shaders
|
||||
constexpr u32 shader_registers_count =
|
||||
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
|
||||
set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
|
||||
DIRTY_REGS_POS(shaders));
|
||||
|
||||
// State
|
||||
|
||||
// Viewport
|
||||
constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
|
||||
constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
|
||||
constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
|
||||
set_block(viewport_start, viewport_size, viewport_dirty_reg);
|
||||
constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
|
||||
constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
|
||||
set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
|
||||
|
||||
// Viewport transformation
|
||||
constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
|
||||
constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
|
||||
set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
|
||||
|
||||
// Cullmode
|
||||
constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
|
||||
constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
|
||||
set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
|
||||
|
||||
// Screen y control
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
|
||||
|
||||
// Primitive Restart
|
||||
constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
|
||||
constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
|
||||
set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
|
||||
|
||||
// Depth Test
|
||||
constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
|
||||
|
||||
// Stencil Test
|
||||
constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
|
||||
|
||||
// Color Mask
|
||||
constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
|
||||
set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
|
||||
color_mask_dirty_reg);
|
||||
// Blend State
|
||||
constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
|
||||
set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
|
||||
blend_state_dirty_reg);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
|
||||
set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
|
||||
set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
|
||||
blend_state_dirty_reg);
|
||||
|
||||
// Scissor State
|
||||
constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
|
||||
set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
|
||||
scissor_test_dirty_reg);
|
||||
|
||||
// Polygon Offset
|
||||
constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
|
||||
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
|
||||
}
|
||||
|
||||
void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
|
||||
// Reset the current macro.
|
||||
executing_macro = 0;
|
||||
@@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
|
||||
const u32 method = method_call.method;
|
||||
|
||||
if (method == cb_data_state.current) {
|
||||
regs.reg_array[method] = method_call.argument;
|
||||
ProcessCBData(method_call.argument);
|
||||
return;
|
||||
} else if (cb_data_state.current != null_cb_data) {
|
||||
FinishCBData();
|
||||
}
|
||||
|
||||
// It is an error to write to a register other than the current macro's ARG register before it
|
||||
// has finished execution.
|
||||
if (executing_macro != 0) {
|
||||
@@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
|
||||
if (regs.reg_array[method] != method_call.argument) {
|
||||
regs.reg_array[method] = method_call.argument;
|
||||
// Color buffers
|
||||
constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
|
||||
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
|
||||
if (method >= first_rt_reg &&
|
||||
method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
|
||||
const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
|
||||
dirty_flags.color_buffer.set(rt_index);
|
||||
}
|
||||
|
||||
// Zeta buffer
|
||||
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
|
||||
if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
|
||||
method == MAXWELL3D_REG_INDEX(zeta_width) ||
|
||||
method == MAXWELL3D_REG_INDEX(zeta_height) ||
|
||||
(method >= MAXWELL3D_REG_INDEX(zeta) &&
|
||||
method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
|
||||
dirty_flags.zeta_buffer = true;
|
||||
}
|
||||
|
||||
// Shader
|
||||
constexpr u32 shader_registers_count =
|
||||
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
|
||||
if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
|
||||
method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
|
||||
dirty_flags.shaders = true;
|
||||
}
|
||||
|
||||
// Vertex format
|
||||
if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
|
||||
dirty_flags.vertex_attrib_format = true;
|
||||
}
|
||||
|
||||
// Vertex buffer
|
||||
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
|
||||
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
|
||||
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
|
||||
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
|
||||
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
|
||||
method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
|
||||
dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
|
||||
const std::size_t dirty_reg = dirty_pointers[method];
|
||||
if (dirty_reg) {
|
||||
dirty.regs[dirty_reg] = true;
|
||||
if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
|
||||
dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
|
||||
dirty.vertex_array_buffers = true;
|
||||
} else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
|
||||
dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
|
||||
dirty.vertex_instances = true;
|
||||
} else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
|
||||
dirty_reg < DIRTY_REGS_POS(render_settings)) {
|
||||
dirty.render_settings = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
|
||||
ProcessCBData(method_call.argument);
|
||||
StartCBData(method);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
|
||||
@@ -249,6 +385,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
ProcessQueryGet();
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(condition.mode): {
|
||||
ProcessQueryCondition();
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(sync_info): {
|
||||
ProcessSyncPoint();
|
||||
break;
|
||||
@@ -261,7 +401,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
const bool is_last_call = method_call.IsLastCall();
|
||||
upload_state.ProcessData(method_call.argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
dirty_flags.OnMemoryWrite();
|
||||
dirty.OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -302,6 +442,7 @@ void Maxwell3D::ProcessQueryGet() {
|
||||
result = regs.query.query_sequence;
|
||||
break;
|
||||
default:
|
||||
result = 1;
|
||||
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
|
||||
static_cast<u32>(regs.query.query_get.select.Value()));
|
||||
}
|
||||
@@ -333,7 +474,6 @@ void Maxwell3D::ProcessQueryGet() {
|
||||
query_result.timestamp = system.CoreTiming().GetTicks();
|
||||
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
|
||||
}
|
||||
dirty_flags.OnMemoryWrite();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -342,6 +482,45 @@ void Maxwell3D::ProcessQueryGet() {
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessQueryCondition() {
|
||||
const GPUVAddr condition_address{regs.condition.Address()};
|
||||
switch (regs.condition.mode) {
|
||||
case Regs::ConditionMode::Always: {
|
||||
execute_on = true;
|
||||
break;
|
||||
}
|
||||
case Regs::ConditionMode::Never: {
|
||||
execute_on = false;
|
||||
break;
|
||||
}
|
||||
case Regs::ConditionMode::ResNonZero: {
|
||||
Regs::QueryCompare cmp;
|
||||
memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
|
||||
execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
|
||||
break;
|
||||
}
|
||||
case Regs::ConditionMode::Equal: {
|
||||
Regs::QueryCompare cmp;
|
||||
memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
|
||||
execute_on =
|
||||
cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
|
||||
break;
|
||||
}
|
||||
case Regs::ConditionMode::NotEqual: {
|
||||
Regs::QueryCompare cmp;
|
||||
memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
|
||||
execute_on =
|
||||
cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
UNIMPLEMENTED_MSG("Uninplemented Condition Mode!");
|
||||
execute_on = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessSyncPoint() {
|
||||
const u32 sync_point = regs.sync_info.sync_point.Value();
|
||||
const u32 increment = regs.sync_info.increment.Value();
|
||||
@@ -405,23 +584,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCBData(u32 value) {
|
||||
const u32 id = cb_data_state.id;
|
||||
cb_data_state.buffer[id][cb_data_state.counter] = value;
|
||||
// Increment the current buffer position.
|
||||
regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
|
||||
cb_data_state.counter++;
|
||||
}
|
||||
|
||||
void Maxwell3D::StartCBData(u32 method) {
|
||||
constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
|
||||
cb_data_state.start_pos = regs.const_buffer.cb_pos;
|
||||
cb_data_state.id = method - first_cb_data;
|
||||
cb_data_state.current = method;
|
||||
cb_data_state.counter = 0;
|
||||
ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
|
||||
}
|
||||
|
||||
void Maxwell3D::FinishCBData() {
|
||||
// Write the input value to the current const buffer at the current position.
|
||||
const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
|
||||
ASSERT(buffer_address != 0);
|
||||
|
||||
// Don't allow writing past the end of the buffer.
|
||||
ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
|
||||
ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
|
||||
|
||||
const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
|
||||
const GPUVAddr address{buffer_address + cb_data_state.start_pos};
|
||||
const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
|
||||
|
||||
u8* ptr{memory_manager.GetPointer(address)};
|
||||
rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
|
||||
memory_manager.Write<u32>(address, value);
|
||||
const u32 id = cb_data_state.id;
|
||||
memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
|
||||
dirty.OnMemoryWrite();
|
||||
|
||||
dirty_flags.OnMemoryWrite();
|
||||
|
||||
// Increment the current buffer position.
|
||||
regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
|
||||
cb_data_state.id = null_cb_data;
|
||||
cb_data_state.current = null_cb_data;
|
||||
}
|
||||
|
||||
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
|
||||
|
||||
@@ -67,6 +67,7 @@ public:
|
||||
static constexpr std::size_t MaxShaderStage = 5;
|
||||
// Maximum number of const buffers per shader stage.
|
||||
static constexpr std::size_t MaxConstBuffers = 18;
|
||||
static constexpr std::size_t MaxConstBufferSize = 0x10000;
|
||||
|
||||
enum class QueryMode : u32 {
|
||||
Write = 0,
|
||||
@@ -89,6 +90,20 @@ public:
|
||||
|
||||
enum class QuerySelect : u32 {
|
||||
Zero = 0,
|
||||
TimeElapsed = 2,
|
||||
TransformFeedbackPrimitivesGenerated = 11,
|
||||
PrimitivesGenerated = 18,
|
||||
SamplesPassed = 21,
|
||||
TransformFeedbackUnknown = 26,
|
||||
};
|
||||
|
||||
struct QueryCompare {
|
||||
u32 initial_sequence;
|
||||
u32 initial_mode;
|
||||
u32 unknown1;
|
||||
u32 unknown2;
|
||||
u32 current_sequence;
|
||||
u32 current_mode;
|
||||
};
|
||||
|
||||
enum class QuerySyncCondition : u32 {
|
||||
@@ -96,6 +111,14 @@ public:
|
||||
GreaterThan = 1,
|
||||
};
|
||||
|
||||
enum class ConditionMode : u32 {
|
||||
Never = 0,
|
||||
Always = 1,
|
||||
ResNonZero = 2,
|
||||
Equal = 3,
|
||||
NotEqual = 4,
|
||||
};
|
||||
|
||||
enum class ShaderProgram : u32 {
|
||||
VertexA = 0,
|
||||
VertexB = 1,
|
||||
@@ -814,7 +837,18 @@ public:
|
||||
BitField<4, 1, u32> alpha_to_one;
|
||||
} multisample_control;
|
||||
|
||||
INSERT_PADDING_WORDS(0x7);
|
||||
INSERT_PADDING_WORDS(0x4);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
ConditionMode mode;
|
||||
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} condition;
|
||||
|
||||
struct {
|
||||
u32 tsc_address_high;
|
||||
@@ -1123,23 +1157,77 @@ public:
|
||||
|
||||
State state{};
|
||||
|
||||
struct DirtyFlags {
|
||||
std::bitset<8> color_buffer{0xFF};
|
||||
std::bitset<32> vertex_array{0xFFFFFFFF};
|
||||
struct DirtyRegs {
|
||||
static constexpr std::size_t NUM_REGS = 256;
|
||||
union {
|
||||
struct {
|
||||
bool null_dirty;
|
||||
|
||||
bool vertex_attrib_format = true;
|
||||
bool zeta_buffer = true;
|
||||
bool shaders = true;
|
||||
// Vertex Attributes
|
||||
bool vertex_attrib_format;
|
||||
|
||||
// Vertex Arrays
|
||||
std::array<bool, 32> vertex_array;
|
||||
|
||||
bool vertex_array_buffers;
|
||||
|
||||
// Vertex Instances
|
||||
std::array<bool, 32> vertex_instance;
|
||||
|
||||
bool vertex_instances;
|
||||
|
||||
// Render Targets
|
||||
std::array<bool, 8> render_target;
|
||||
bool depth_buffer;
|
||||
|
||||
bool render_settings;
|
||||
|
||||
// Shaders
|
||||
bool shaders;
|
||||
|
||||
// Rasterizer State
|
||||
bool viewport;
|
||||
bool clip_coefficient;
|
||||
bool cull_mode;
|
||||
bool primitive_restart;
|
||||
bool depth_test;
|
||||
bool stencil_test;
|
||||
bool blend_state;
|
||||
bool scissor_test;
|
||||
bool transform_feedback;
|
||||
bool color_mask;
|
||||
bool polygon_offset;
|
||||
|
||||
// Complementary
|
||||
bool viewport_transform;
|
||||
bool screen_y_control;
|
||||
|
||||
bool memory_general;
|
||||
};
|
||||
std::array<bool, NUM_REGS> regs;
|
||||
};
|
||||
|
||||
void ResetVertexArrays() {
|
||||
vertex_array.fill(true);
|
||||
vertex_array_buffers = true;
|
||||
}
|
||||
|
||||
void ResetRenderTargets() {
|
||||
depth_buffer = true;
|
||||
render_target.fill(true);
|
||||
render_settings = true;
|
||||
}
|
||||
|
||||
void OnMemoryWrite() {
|
||||
zeta_buffer = true;
|
||||
shaders = true;
|
||||
color_buffer.set();
|
||||
vertex_array.set();
|
||||
memory_general = true;
|
||||
ResetRenderTargets();
|
||||
ResetVertexArrays();
|
||||
}
|
||||
};
|
||||
|
||||
DirtyFlags dirty_flags;
|
||||
} dirty{};
|
||||
|
||||
std::array<u8, Regs::NUM_REGS> dirty_pointers{};
|
||||
|
||||
/// Reads a register value located at the input method address
|
||||
u32 GetRegisterValue(u32 method) const;
|
||||
@@ -1168,6 +1256,10 @@ public:
|
||||
return macro_memory;
|
||||
}
|
||||
|
||||
bool ShouldExecute() const {
|
||||
return execute_on;
|
||||
}
|
||||
|
||||
private:
|
||||
void InitializeRegisterDefaults();
|
||||
|
||||
@@ -1191,14 +1283,27 @@ private:
|
||||
/// Interpreter for the macro codes uploaded to the GPU.
|
||||
MacroInterpreter macro_interpreter;
|
||||
|
||||
static constexpr u32 null_cb_data = 0xFFFFFFFF;
|
||||
struct {
|
||||
std::array<std::array<u32, 0x4000>, 16> buffer;
|
||||
u32 current{null_cb_data};
|
||||
u32 id{null_cb_data};
|
||||
u32 start_pos{};
|
||||
u32 counter{};
|
||||
} cb_data_state;
|
||||
|
||||
Upload::State upload_state;
|
||||
|
||||
bool execute_on{true};
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
||||
|
||||
/// Retrieves information about a specific TSC entry from the TSC buffer.
|
||||
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
|
||||
|
||||
void InitDirtySettings();
|
||||
|
||||
/**
|
||||
* Call a macro on this engine.
|
||||
* @param method Method to call
|
||||
@@ -1218,11 +1323,16 @@ private:
|
||||
/// Handles a write to the QUERY_GET register.
|
||||
void ProcessQueryGet();
|
||||
|
||||
// Handles Conditional Rendering
|
||||
void ProcessQueryCondition();
|
||||
|
||||
/// Handles writes to syncing register.
|
||||
void ProcessSyncPoint();
|
||||
|
||||
/// Handles a write to the CB_DATA[i] register.
|
||||
void StartCBData(u32 method);
|
||||
void ProcessCBData(u32 value);
|
||||
void FinishCBData();
|
||||
|
||||
/// Handles a write to the CB_BIND register.
|
||||
void ProcessCBBind(Regs::ShaderStage stage);
|
||||
@@ -1289,6 +1399,7 @@ ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
|
||||
ASSERT_REG_POSITION(point_size, 0x546);
|
||||
ASSERT_REG_POSITION(zeta_enable, 0x54E);
|
||||
ASSERT_REG_POSITION(multisample_control, 0x54F);
|
||||
ASSERT_REG_POSITION(condition, 0x554);
|
||||
ASSERT_REG_POSITION(tsc, 0x557);
|
||||
ASSERT_REG_POSITION(polygon_offset_factor, 0x55b);
|
||||
ASSERT_REG_POSITION(tic, 0x55D);
|
||||
|
||||
@@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() {
|
||||
}
|
||||
|
||||
// All copies here update the main memory, so mark all rasterizer states as invalid.
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
system.GPU().Maxwell3D().dirty.OnMemoryWrite();
|
||||
|
||||
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
|
||||
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
|
||||
|
||||
@@ -78,7 +78,7 @@ union Attribute {
|
||||
constexpr explicit Attribute(u64 value) : value(value) {}
|
||||
|
||||
enum class Index : u64 {
|
||||
PointSize = 6,
|
||||
LayerViewportPointSize = 6,
|
||||
Position = 7,
|
||||
Attribute_0 = 8,
|
||||
Attribute_31 = 39,
|
||||
@@ -931,8 +931,6 @@ union Instruction {
|
||||
} csetp;
|
||||
|
||||
union {
|
||||
BitField<35, 4, PredCondition> cond;
|
||||
BitField<49, 1, u64> h_and;
|
||||
BitField<6, 1, u64> ftz;
|
||||
BitField<45, 2, PredOperation> op;
|
||||
BitField<3, 3, u64> pred3;
|
||||
@@ -940,9 +938,21 @@ union Instruction {
|
||||
BitField<43, 1, u64> negate_a;
|
||||
BitField<44, 1, u64> abs_a;
|
||||
BitField<47, 2, HalfType> type_a;
|
||||
BitField<31, 1, u64> negate_b;
|
||||
BitField<30, 1, u64> abs_b;
|
||||
BitField<28, 2, HalfType> type_b;
|
||||
union {
|
||||
BitField<35, 4, PredCondition> cond;
|
||||
BitField<49, 1, u64> h_and;
|
||||
BitField<31, 1, u64> negate_b;
|
||||
BitField<30, 1, u64> abs_b;
|
||||
BitField<28, 2, HalfType> type_b;
|
||||
} reg;
|
||||
union {
|
||||
BitField<56, 1, u64> negate_b;
|
||||
BitField<54, 1, u64> abs_b;
|
||||
} cbuf;
|
||||
union {
|
||||
BitField<49, 4, PredCondition> cond;
|
||||
BitField<53, 1, u64> h_and;
|
||||
} cbuf_and_imm;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<39, 3, u64> pred39;
|
||||
} hsetp2;
|
||||
@@ -1548,7 +1558,9 @@ public:
|
||||
HFMA2_RC,
|
||||
HFMA2_RR,
|
||||
HFMA2_IMM_R,
|
||||
HSETP2_C,
|
||||
HSETP2_R,
|
||||
HSETP2_IMM,
|
||||
HSET2_R,
|
||||
POPC_C,
|
||||
POPC_R,
|
||||
@@ -1831,7 +1843,9 @@ private:
|
||||
INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
|
||||
INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
|
||||
INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
|
||||
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"),
|
||||
INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
|
||||
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
|
||||
INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
|
||||
INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
|
||||
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
|
||||
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
|
||||
|
||||
@@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
|
||||
|
||||
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
|
||||
auto& rasterizer{renderer.Rasterizer()};
|
||||
memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
|
||||
memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
|
||||
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
|
||||
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
|
||||
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
|
||||
@@ -50,6 +50,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const {
|
||||
return *maxwell_3d;
|
||||
}
|
||||
|
||||
Engines::KeplerCompute& GPU::KeplerCompute() {
|
||||
return *kepler_compute;
|
||||
}
|
||||
|
||||
const Engines::KeplerCompute& GPU::KeplerCompute() const {
|
||||
return *kepler_compute;
|
||||
}
|
||||
|
||||
MemoryManager& GPU::MemoryManager() {
|
||||
return *memory_manager;
|
||||
}
|
||||
|
||||
@@ -155,6 +155,12 @@ public:
|
||||
/// Returns a const reference to the Maxwell3D GPU engine.
|
||||
const Engines::Maxwell3D& Maxwell3D() const;
|
||||
|
||||
/// Returns a reference to the KeplerCompute GPU engine.
|
||||
Engines::KeplerCompute& KeplerCompute();
|
||||
|
||||
/// Returns a reference to the KeplerCompute GPU engine.
|
||||
const Engines::KeplerCompute& KeplerCompute() const;
|
||||
|
||||
/// Returns a reference to the GPU memory manager.
|
||||
Tegra::MemoryManager& MemoryManager();
|
||||
|
||||
|
||||
@@ -4,14 +4,18 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro_interpreter.h"
|
||||
|
||||
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
|
||||
|
||||
void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
|
||||
MICROPROFILE_SCOPE(MacroInterp);
|
||||
Reset();
|
||||
registers[1] = parameters[0];
|
||||
this->parameters = std::move(parameters);
|
||||
|
||||
@@ -5,13 +5,17 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/hle/kernel/vm_manager.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {
|
||||
MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
|
||||
: rasterizer{rasterizer}, system{system} {
|
||||
std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
|
||||
std::fill(page_table.attributes.begin(), page_table.attributes.end(),
|
||||
Common::PageType::Unmapped);
|
||||
@@ -49,6 +53,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
|
||||
const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
|
||||
|
||||
MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
|
||||
ASSERT(system.CurrentProcess()
|
||||
->VMManager()
|
||||
.SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
|
||||
Kernel::MemoryAttribute::DeviceMapped)
|
||||
.IsSuccess());
|
||||
|
||||
return gpu_addr;
|
||||
}
|
||||
@@ -59,7 +68,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
|
||||
const u64 aligned_size{Common::AlignUp(size, page_size)};
|
||||
|
||||
MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
|
||||
|
||||
ASSERT(system.CurrentProcess()
|
||||
->VMManager()
|
||||
.SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
|
||||
Kernel::MemoryAttribute::DeviceMapped)
|
||||
.IsSuccess());
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
@@ -68,9 +81,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
|
||||
|
||||
const u64 aligned_size{Common::AlignUp(size, page_size)};
|
||||
const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
|
||||
const auto cpu_addr = GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
|
||||
UnmapRange(gpu_addr, aligned_size);
|
||||
ASSERT(system.CurrentProcess()
|
||||
->VMManager()
|
||||
.SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped,
|
||||
Kernel::MemoryAttribute::None)
|
||||
.IsSuccess());
|
||||
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
@@ -14,6 +14,10 @@ namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
/**
|
||||
@@ -47,7 +51,7 @@ struct VirtualMemoryArea {
|
||||
|
||||
class MemoryManager final {
|
||||
public:
|
||||
explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer);
|
||||
explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
|
||||
~MemoryManager();
|
||||
|
||||
GPUVAddr AllocateSpace(u64 size, u64 align);
|
||||
@@ -173,6 +177,8 @@ private:
|
||||
Common::PageTable page_table{page_bits};
|
||||
VMAMap vma_map;
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
|
||||
Core::System& system;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -34,6 +34,9 @@ public:
|
||||
/// Clear the current framebuffer
|
||||
virtual void Clear() = 0;
|
||||
|
||||
/// Dispatches a compute shader invocation
|
||||
virtual void DispatchCompute(GPUVAddr code_addr) = 0;
|
||||
|
||||
/// Notify rasterizer that all caches should be flushed to Switch memory
|
||||
virtual void FlushAll() = 0;
|
||||
|
||||
@@ -47,6 +50,9 @@ public:
|
||||
/// and invalidated
|
||||
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that a frame is about to finish
|
||||
virtual void TickFrame() = 0;
|
||||
|
||||
/// Attempt to use a faster method to perform a surface copy
|
||||
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
|
||||
@@ -2,103 +2,57 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
|
||||
std::size_t alignment, u8* host_ptr)
|
||||
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
|
||||
alignment{alignment} {}
|
||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
std::size_t stream_size)
|
||||
: VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{
|
||||
rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
|
||||
|
||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
|
||||
: RasterizerCache{rasterizer}, stream_buffer(size, true) {}
|
||||
OGLBufferCache::~OGLBufferCache() = default;
|
||||
|
||||
GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
|
||||
bool cache) {
|
||||
std::lock_guard lock{mutex};
|
||||
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
|
||||
|
||||
// Cache management is a big overhead, so only cache entries with a given size.
|
||||
// TODO: Figure out which size is the best for given games.
|
||||
cache &= size >= 2048;
|
||||
|
||||
const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
|
||||
if (cache) {
|
||||
auto entry = TryGet(host_ptr);
|
||||
if (entry) {
|
||||
if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
|
||||
return entry->GetOffset();
|
||||
}
|
||||
Unregister(entry);
|
||||
}
|
||||
}
|
||||
|
||||
AlignBuffer(alignment);
|
||||
const GLintptr uploaded_offset = buffer_offset;
|
||||
|
||||
if (!host_ptr) {
|
||||
return uploaded_offset;
|
||||
}
|
||||
|
||||
std::memcpy(buffer_ptr, host_ptr, size);
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
|
||||
if (cache) {
|
||||
auto entry = std::make_shared<CachedBufferEntry>(
|
||||
*memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
|
||||
Register(entry);
|
||||
}
|
||||
|
||||
return uploaded_offset;
|
||||
OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) {
|
||||
OGLBuffer buffer;
|
||||
buffer.Create();
|
||||
glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size,
|
||||
std::size_t alignment) {
|
||||
std::lock_guard lock{mutex};
|
||||
AlignBuffer(alignment);
|
||||
std::memcpy(buffer_ptr, raw_pointer, size);
|
||||
const GLintptr uploaded_offset = buffer_offset;
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return uploaded_offset;
|
||||
const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) {
|
||||
return &buffer.handle;
|
||||
}
|
||||
|
||||
bool OGLBufferCache::Map(std::size_t max_size) {
|
||||
bool invalidate;
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidate) =
|
||||
stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
|
||||
buffer_offset = buffer_offset_base;
|
||||
|
||||
if (invalidate) {
|
||||
InvalidateAll();
|
||||
}
|
||||
return invalidate;
|
||||
const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||
static const GLuint null_buffer = 0;
|
||||
return &null_buffer;
|
||||
}
|
||||
|
||||
void OGLBufferCache::Unmap() {
|
||||
stream_buffer.Unmap(buffer_offset - buffer_offset_base);
|
||||
void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) {
|
||||
glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(size), data);
|
||||
}
|
||||
|
||||
GLuint OGLBufferCache::GetHandle() const {
|
||||
return stream_buffer.GetHandle();
|
||||
void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset,
|
||||
std::size_t size, u8* data) {
|
||||
glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(size), data);
|
||||
}
|
||||
|
||||
void OGLBufferCache::AlignBuffer(std::size_t alignment) {
|
||||
// Align the offset, not the mapped pointer
|
||||
const GLintptr offset_aligned =
|
||||
static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));
|
||||
buffer_ptr += offset_aligned - buffer_offset;
|
||||
buffer_offset = offset_aligned;
|
||||
void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst,
|
||||
std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t size) {
|
||||
glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset),
|
||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -4,80 +4,44 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/buffer_cache.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class OGLStreamBuffer;
|
||||
class RasterizerOpenGL;
|
||||
|
||||
class CachedBufferEntry final : public RasterizerCacheObject {
|
||||
class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> {
|
||||
public:
|
||||
explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
|
||||
std::size_t alignment, u8* host_ptr);
|
||||
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
std::size_t stream_size);
|
||||
~OGLBufferCache();
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
||||
std::size_t GetSize() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
GLintptr GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t GetAlignment() const {
|
||||
return alignment;
|
||||
}
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
std::size_t size{};
|
||||
GLintptr offset{};
|
||||
std::size_t alignment{};
|
||||
};
|
||||
|
||||
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
||||
public:
|
||||
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size);
|
||||
|
||||
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
|
||||
/// allocated.
|
||||
GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
|
||||
bool cache = true);
|
||||
|
||||
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
|
||||
GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
|
||||
|
||||
bool Map(std::size_t max_size);
|
||||
void Unmap();
|
||||
|
||||
GLuint GetHandle() const;
|
||||
const GLuint* GetEmptyBuffer(std::size_t) override;
|
||||
|
||||
protected:
|
||||
void AlignBuffer(std::size_t alignment);
|
||||
OGLBuffer CreateBuffer(std::size_t size) override;
|
||||
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
|
||||
const GLuint* ToHandle(const OGLBuffer& buffer) override;
|
||||
|
||||
private:
|
||||
OGLStreamBuffer stream_buffer;
|
||||
void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) override;
|
||||
|
||||
u8* buffer_ptr = nullptr;
|
||||
GLintptr buffer_offset = 0;
|
||||
GLintptr buffer_offset_base = 0;
|
||||
void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
|
||||
u8* data) override;
|
||||
|
||||
void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset,
|
||||
std::size_t dst_offset, std::size_t size) override;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -24,8 +24,10 @@ T GetInteger(GLenum pname) {
|
||||
|
||||
Device::Device() {
|
||||
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
|
||||
has_variable_aoffi = TestVariableAoffi();
|
||||
has_component_indexing_bug = TestComponentIndexingBug();
|
||||
}
|
||||
@@ -34,6 +36,7 @@ Device::Device(std::nullptr_t) {
|
||||
uniform_buffer_alignment = 0;
|
||||
max_vertex_attributes = 16;
|
||||
max_varyings = 15;
|
||||
has_vertex_viewport_layer = true;
|
||||
has_variable_aoffi = true;
|
||||
has_component_indexing_bug = false;
|
||||
}
|
||||
|
||||
@@ -18,6 +18,10 @@ public:
|
||||
return uniform_buffer_alignment;
|
||||
}
|
||||
|
||||
std::size_t GetShaderStorageBufferAlignment() const {
|
||||
return shader_storage_alignment;
|
||||
}
|
||||
|
||||
u32 GetMaxVertexAttributes() const {
|
||||
return max_vertex_attributes;
|
||||
}
|
||||
@@ -26,6 +30,10 @@ public:
|
||||
return max_varyings;
|
||||
}
|
||||
|
||||
bool HasVertexViewportLayer() const {
|
||||
return has_vertex_viewport_layer;
|
||||
}
|
||||
|
||||
bool HasVariableAoffi() const {
|
||||
return has_variable_aoffi;
|
||||
}
|
||||
@@ -39,8 +47,10 @@ private:
|
||||
static bool TestComponentIndexingBug();
|
||||
|
||||
std::size_t uniform_buffer_alignment{};
|
||||
std::size_t shader_storage_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
u32 max_varyings{};
|
||||
bool has_vertex_viewport_layer{};
|
||||
bool has_variable_aoffi{};
|
||||
bool has_component_indexing_bug{};
|
||||
};
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_global_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
|
||||
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
|
||||
max_size{max_size} {
|
||||
buffer.Create();
|
||||
LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
|
||||
}
|
||||
|
||||
CachedGlobalRegion::~CachedGlobalRegion() = default;
|
||||
|
||||
void CachedGlobalRegion::Reload(u32 size_) {
|
||||
size = size_;
|
||||
if (size > max_size) {
|
||||
size = max_size;
|
||||
LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
|
||||
max_size);
|
||||
}
|
||||
glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
|
||||
}
|
||||
|
||||
void CachedGlobalRegion::Flush() {
|
||||
LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
|
||||
glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
|
||||
}
|
||||
|
||||
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
|
||||
const auto search{reserve.find(addr)};
|
||||
if (search == reserve.end()) {
|
||||
return {};
|
||||
}
|
||||
return search->second;
|
||||
}
|
||||
|
||||
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
|
||||
u32 size) {
|
||||
GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
|
||||
if (!region) {
|
||||
// No reserved surface available, create a new one and reserve it
|
||||
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
|
||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
|
||||
ReserveGlobalRegion(region);
|
||||
}
|
||||
region->Reload(size);
|
||||
return region;
|
||||
}
|
||||
|
||||
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
|
||||
reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
|
||||
}
|
||||
|
||||
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
|
||||
: RasterizerCache{rasterizer} {
|
||||
GLint max_ssbo_size_;
|
||||
glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
|
||||
max_ssbo_size = static_cast<u32>(max_ssbo_size_);
|
||||
}
|
||||
|
||||
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
|
||||
const GLShader::GlobalMemoryEntry& global_region,
|
||||
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
auto& gpu{Core::System::GetInstance().GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
|
||||
const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
|
||||
global_region.GetCbufOffset()};
|
||||
const auto actual_addr{memory_manager.Read<u64>(addr)};
|
||||
const auto size{memory_manager.Read<u32>(addr + 8)};
|
||||
|
||||
// Look up global region in the cache based on address
|
||||
const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
|
||||
GlobalRegion region{TryGet(host_ptr)};
|
||||
|
||||
if (!region) {
|
||||
// No global region found - create a new one
|
||||
region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
|
||||
Register(region);
|
||||
}
|
||||
|
||||
return region;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -1,82 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
namespace GLShader {
|
||||
class GlobalMemoryEntry;
|
||||
}
|
||||
|
||||
class RasterizerOpenGL;
|
||||
class CachedGlobalRegion;
|
||||
using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
|
||||
|
||||
class CachedGlobalRegion final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
|
||||
~CachedGlobalRegion();
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
||||
/// Gets the GL program handle for the buffer
|
||||
GLuint GetBufferHandle() const {
|
||||
return buffer.handle;
|
||||
}
|
||||
|
||||
/// Reloads the global region from guest memory
|
||||
void Reload(u32 size_);
|
||||
|
||||
void Flush();
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
u8* host_ptr{};
|
||||
u32 size{};
|
||||
u32 max_size{};
|
||||
|
||||
OGLBuffer buffer;
|
||||
};
|
||||
|
||||
class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
|
||||
public:
|
||||
explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
|
||||
|
||||
/// Gets the current specified shader stage program
|
||||
GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
|
||||
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
|
||||
|
||||
protected:
|
||||
void FlushObjectInner(const GlobalRegion& object) override {
|
||||
object->Flush();
|
||||
}
|
||||
|
||||
private:
|
||||
GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
|
||||
GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
|
||||
void ReserveGlobalRegion(GlobalRegion region);
|
||||
|
||||
std::unordered_map<CacheAddr, GlobalRegion> reserve;
|
||||
u32 max_ssbo_size{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
@@ -19,7 +20,9 @@
|
||||
#include "core/core.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
@@ -80,16 +83,31 @@ struct DrawParameters {
|
||||
}
|
||||
};
|
||||
|
||||
static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const GLShader::ConstBufferEntry& entry) {
|
||||
if (!entry.IsIndirect()) {
|
||||
return entry.GetSize();
|
||||
}
|
||||
|
||||
if (buffer.size > Maxwell::MaxConstBufferSize) {
|
||||
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
|
||||
Maxwell::MaxConstBufferSize);
|
||||
return Maxwell::MaxConstBufferSize;
|
||||
}
|
||||
|
||||
return buffer.size;
|
||||
}
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||
ScreenInfo& info)
|
||||
: texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
|
||||
global_cache{*this}, system{system}, screen_info{info},
|
||||
buffer_cache(*this, STREAM_BUFFER_SIZE) {
|
||||
system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
|
||||
OpenGLState::ApplyDefaultState();
|
||||
|
||||
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
||||
state.draw.shader_program = 0;
|
||||
state.Apply();
|
||||
clear_framebuffer.Create();
|
||||
|
||||
LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
|
||||
CheckExtensions();
|
||||
@@ -109,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
if (!gpu.dirty_flags.vertex_attrib_format) {
|
||||
if (!gpu.dirty.vertex_attrib_format) {
|
||||
return state.draw.vertex_array;
|
||||
}
|
||||
gpu.dirty_flags.vertex_attrib_format = false;
|
||||
gpu.dirty.vertex_attrib_format = false;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VAO);
|
||||
|
||||
@@ -129,8 +147,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
state.draw.vertex_array = vao;
|
||||
state.ApplyVertexArrayState();
|
||||
|
||||
glVertexArrayElementBuffer(vao, buffer_cache.GetHandle());
|
||||
|
||||
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
|
||||
// Enables the first 16 vertex attributes always, as we don't know which ones are actually
|
||||
// used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
|
||||
@@ -168,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
}
|
||||
|
||||
// Rebinding the VAO invalidates the vertex buffer bindings.
|
||||
gpu.dirty_flags.vertex_array.set();
|
||||
gpu.dirty.ResetVertexArrays();
|
||||
|
||||
state.draw.vertex_array = vao_entry.handle;
|
||||
return vao_entry.handle;
|
||||
@@ -176,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
|
||||
void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
if (gpu.dirty_flags.vertex_array.none())
|
||||
if (!gpu.dirty.vertex_array_buffers)
|
||||
return;
|
||||
gpu.dirty.vertex_array_buffers = false;
|
||||
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_VB);
|
||||
|
||||
// Upload all guest vertex arrays sequentially to our buffer
|
||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
if (!gpu.dirty_flags.vertex_array[index])
|
||||
if (!gpu.dirty.vertex_array[index])
|
||||
continue;
|
||||
gpu.dirty.vertex_array[index] = false;
|
||||
gpu.dirty.vertex_instance[index] = false;
|
||||
|
||||
const auto& vertex_array = regs.vertex_array[index];
|
||||
if (!vertex_array.IsEnabled())
|
||||
@@ -197,11 +216,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
|
||||
|
||||
ASSERT(end > start);
|
||||
const u64 size = end - start + 1;
|
||||
const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size);
|
||||
const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
|
||||
|
||||
// Bind the vertex array to the buffer at the current offset.
|
||||
glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset,
|
||||
vertex_array.stride);
|
||||
vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset,
|
||||
vertex_array.stride);
|
||||
|
||||
if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
|
||||
// Enable vertex buffer instancing with the specified divisor.
|
||||
@@ -211,11 +230,47 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
|
||||
glVertexArrayBindingDivisor(vao, index, 0);
|
||||
}
|
||||
}
|
||||
|
||||
gpu.dirty_flags.vertex_array.reset();
|
||||
}
|
||||
|
||||
DrawParameters RasterizerOpenGL::SetupDraw() {
|
||||
void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
|
||||
if (!gpu.dirty.vertex_instances)
|
||||
return;
|
||||
gpu.dirty.vertex_instances = false;
|
||||
|
||||
const auto& regs = gpu.regs;
|
||||
// Upload all guest vertex arrays sequentially to our buffer
|
||||
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
if (!gpu.dirty.vertex_instance[index])
|
||||
continue;
|
||||
|
||||
gpu.dirty.vertex_instance[index] = false;
|
||||
|
||||
if (regs.instanced_arrays.IsInstancingEnabled(index) &&
|
||||
regs.vertex_array[index].divisor != 0) {
|
||||
// Enable vertex buffer instancing with the specified divisor.
|
||||
glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
|
||||
} else {
|
||||
// Disable the vertex buffer instancing.
|
||||
glVertexArrayBindingDivisor(vao, index, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||
if (accelerate_draw != AccelDraw::Indexed) {
|
||||
return 0;
|
||||
}
|
||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
const std::size_t size = CalculateIndexBufferSize();
|
||||
const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
|
||||
vertex_array_pushbuffer.SetIndexBuffer(buffer);
|
||||
return offset;
|
||||
}
|
||||
|
||||
DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) {
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
|
||||
@@ -227,11 +282,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
|
||||
params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
|
||||
|
||||
if (is_indexed) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Index);
|
||||
params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
|
||||
params.count = regs.index_array.count;
|
||||
params.index_buffer_offset =
|
||||
buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
|
||||
params.index_buffer_offset = index_buffer_offset;
|
||||
params.base_vertex = static_cast<GLint>(regs.vb_element_base);
|
||||
} else {
|
||||
params.count = regs.vertex_buffer.count;
|
||||
@@ -247,10 +300,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
BaseBindings base_bindings;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
|
||||
// Prepare packed bindings
|
||||
bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
|
||||
bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const auto& shader_config = gpu.regs.shader_config[index];
|
||||
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
@@ -271,18 +320,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
|
||||
GLShader::MaxwellUniformData ubo{};
|
||||
ubo.SetFromRegs(gpu, stage);
|
||||
const GLintptr offset =
|
||||
const auto [buffer, offset] =
|
||||
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
||||
|
||||
// Bind the emulation info buffer
|
||||
bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset,
|
||||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||
bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||
|
||||
Shader shader{shader_cache.GetStageProgram(program)};
|
||||
|
||||
const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)};
|
||||
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
|
||||
SetupDrawConstBuffers(stage_enum, shader);
|
||||
SetupGlobalRegions(stage_enum, shader);
|
||||
SetupDrawGlobalMemory(stage_enum, shader);
|
||||
const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
|
||||
|
||||
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
|
||||
@@ -321,12 +369,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
base_bindings = next_bindings;
|
||||
}
|
||||
|
||||
bind_ubo_pushbuffer.Bind();
|
||||
bind_ssbo_pushbuffer.Bind();
|
||||
|
||||
SyncClipEnabled(clip_distances);
|
||||
|
||||
gpu.dirty_flags.shaders = false;
|
||||
gpu.dirty.shaders = false;
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
||||
@@ -409,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
|
||||
|
||||
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
|
||||
single_color_target};
|
||||
if (fb_config_state == current_framebuffer_config_state &&
|
||||
gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
|
||||
if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
|
||||
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
|
||||
// single color targets). This is done because the guest registers may not change but the
|
||||
// host framebuffer may contain different attachments
|
||||
return current_depth_stencil_usage;
|
||||
}
|
||||
gpu.dirty.render_settings = false;
|
||||
current_framebuffer_config_state = fb_config_state;
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
@@ -504,13 +549,71 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
|
||||
return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
|
||||
bool using_depth_fb, bool using_stencil_fb) {
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
View color_surface{};
|
||||
if (using_color_fb) {
|
||||
color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
|
||||
}
|
||||
View depth_surface{};
|
||||
if (using_depth_fb || using_stencil_fb) {
|
||||
depth_surface = texture_cache.GetDepthBufferSurface(false);
|
||||
}
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
current_state.draw.draw_framebuffer = clear_framebuffer.handle;
|
||||
current_state.ApplyFramebufferState();
|
||||
|
||||
if (color_surface) {
|
||||
color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
|
||||
} else {
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
}
|
||||
|
||||
if (depth_surface) {
|
||||
const auto& params = depth_surface->GetSurfaceParams();
|
||||
switch (params.type) {
|
||||
case VideoCore::Surface::SurfaceType::Depth: {
|
||||
depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
||||
break;
|
||||
}
|
||||
case VideoCore::Surface::SurfaceType::DepthStencil: {
|
||||
depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
|
||||
break;
|
||||
}
|
||||
default: { UNIMPLEMENTED(); }
|
||||
}
|
||||
} else {
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
|
||||
0);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Clear() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
const auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
|
||||
if (!maxwell3d.ShouldExecute()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto& regs = maxwell3d.regs;
|
||||
bool use_color{};
|
||||
bool use_depth{};
|
||||
bool use_stencil{};
|
||||
|
||||
OpenGLState clear_state;
|
||||
OpenGLState prev_state{OpenGLState::GetCurState()};
|
||||
SCOPE_EXIT({
|
||||
prev_state.AllDirty();
|
||||
prev_state.Apply();
|
||||
});
|
||||
|
||||
OpenGLState clear_state{OpenGLState::GetCurState()};
|
||||
clear_state.SetDefaultViewports();
|
||||
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
|
||||
regs.clear_buffers.A) {
|
||||
use_color = true;
|
||||
@@ -530,6 +633,7 @@ void RasterizerOpenGL::Clear() {
|
||||
// true.
|
||||
clear_state.depth.test_enabled = true;
|
||||
clear_state.depth.test_func = GL_ALWAYS;
|
||||
clear_state.depth.write_mask = GL_TRUE;
|
||||
}
|
||||
if (regs.clear_buffers.S) {
|
||||
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
|
||||
@@ -566,8 +670,9 @@ void RasterizerOpenGL::Clear() {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
|
||||
clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
|
||||
ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
|
||||
|
||||
SyncViewport(clear_state);
|
||||
if (regs.clear_flags.scissor) {
|
||||
SyncScissorTest(clear_state);
|
||||
}
|
||||
@@ -576,21 +681,18 @@ void RasterizerOpenGL::Clear() {
|
||||
clear_state.EmulateViewportWithScissor();
|
||||
}
|
||||
|
||||
clear_state.ApplyColorMask();
|
||||
clear_state.ApplyDepth();
|
||||
clear_state.ApplyStencilTest();
|
||||
clear_state.ApplyViewport();
|
||||
clear_state.ApplyFramebufferState();
|
||||
clear_state.AllDirty();
|
||||
clear_state.Apply();
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
||||
glClearBufferfv(GL_COLOR, 0, regs.clear_color);
|
||||
}
|
||||
|
||||
if (clear_depth && clear_stencil) {
|
||||
if (use_depth && use_stencil) {
|
||||
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
|
||||
} else if (clear_depth) {
|
||||
} else if (use_depth) {
|
||||
glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth);
|
||||
} else if (clear_stencil) {
|
||||
} else if (use_stencil) {
|
||||
glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);
|
||||
}
|
||||
}
|
||||
@@ -601,6 +703,11 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
|
||||
if (!gpu.ShouldExecute()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
SyncColorMask();
|
||||
@@ -634,26 +741,47 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
Maxwell::MaxShaderStage;
|
||||
|
||||
// Add space for at least 18 constant buffers
|
||||
buffer_size +=
|
||||
Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
|
||||
buffer_size += Maxwell::MaxConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
|
||||
const bool invalidate = buffer_cache.Map(buffer_size);
|
||||
if (invalidate) {
|
||||
// As all cached buffers are invalidated, we need to recheck their state.
|
||||
gpu.dirty_flags.vertex_array.set();
|
||||
}
|
||||
// Prepare the vertex array.
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
// Prepare vertex array format.
|
||||
const GLuint vao = SetupVertexFormat();
|
||||
SetupVertexBuffer(vao);
|
||||
vertex_array_pushbuffer.Setup(vao);
|
||||
|
||||
DrawParameters params = SetupDraw();
|
||||
// Upload vertex and index data.
|
||||
SetupVertexBuffer(vao);
|
||||
SetupVertexInstances(vao);
|
||||
const GLintptr index_buffer_offset = SetupIndexBuffer();
|
||||
|
||||
// Setup draw parameters. It will automatically choose what glDraw* method to use.
|
||||
const DrawParameters params = SetupDraw(index_buffer_offset);
|
||||
|
||||
// Prepare packed bindings.
|
||||
bind_ubo_pushbuffer.Setup(0);
|
||||
bind_ssbo_pushbuffer.Setup(0);
|
||||
|
||||
// Setup shaders and their used resources.
|
||||
texture_cache.GuardSamplers(true);
|
||||
SetupShaders(params.primitive_mode);
|
||||
texture_cache.GuardSamplers(false);
|
||||
|
||||
ConfigureFramebuffers(state);
|
||||
|
||||
buffer_cache.Unmap();
|
||||
// Signal the buffer cache that we are not going to upload more things.
|
||||
const bool invalidate = buffer_cache.Unmap();
|
||||
|
||||
// Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
|
||||
vertex_array_pushbuffer.Bind();
|
||||
bind_ubo_pushbuffer.Bind();
|
||||
bind_ssbo_pushbuffer.Bind();
|
||||
|
||||
if (invalidate) {
|
||||
// As all cached buffers are invalidated, we need to recheck their state.
|
||||
gpu.dirty.ResetVertexArrays();
|
||||
}
|
||||
|
||||
shader_program_manager->ApplyTo(state);
|
||||
state.Apply();
|
||||
@@ -665,6 +793,46 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
params.DispatchDraw();
|
||||
|
||||
accelerate_draw = AccelDraw::Disabled;
|
||||
gpu.dirty.memory_general = false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||
if (!GLAD_GL_ARB_compute_variable_group_size) {
|
||||
LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the "
|
||||
"lack of GL_ARB_compute_variable_group_size");
|
||||
return;
|
||||
}
|
||||
|
||||
auto kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
const auto [program, next_bindings] = kernel->GetProgramHandle({});
|
||||
state.draw.shader_program = program;
|
||||
state.draw.program_pipeline = 0;
|
||||
|
||||
const std::size_t buffer_size =
|
||||
Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
bind_ubo_pushbuffer.Setup(0);
|
||||
bind_ssbo_pushbuffer.Setup(0);
|
||||
|
||||
SetupComputeConstBuffers(kernel);
|
||||
SetupComputeGlobalMemory(kernel);
|
||||
|
||||
// TODO(Rodrigo): Bind images and samplers
|
||||
|
||||
buffer_cache.Unmap();
|
||||
|
||||
bind_ubo_pushbuffer.Bind();
|
||||
bind_ssbo_pushbuffer.Bind();
|
||||
|
||||
state.ApplyShaderProgram();
|
||||
state.ApplyProgramPipeline();
|
||||
|
||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||
glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y,
|
||||
launch_desc.grid_dim_z, launch_desc.block_dim_x,
|
||||
launch_desc.block_dim_y, launch_desc.block_dim_z);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushAll() {}
|
||||
@@ -675,7 +843,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
|
||||
return;
|
||||
}
|
||||
texture_cache.FlushRegion(addr, size);
|
||||
global_cache.FlushRegion(addr, size);
|
||||
buffer_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
@@ -685,7 +853,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
}
|
||||
texture_cache.InvalidateRegion(addr, size);
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
global_cache.InvalidateRegion(addr, size);
|
||||
buffer_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
@@ -696,6 +863,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::TickFrame() {
|
||||
buffer_cache.TickFrame();
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
@@ -737,14 +908,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto stage_index = static_cast<std::size_t>(stage);
|
||||
const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index];
|
||||
const auto& entries = shader->GetShaderEntries().const_buffers;
|
||||
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
|
||||
const auto& shader_stage = stages[static_cast<std::size_t>(stage)];
|
||||
for (const auto& entry : shader->GetShaderEntries().const_buffers) {
|
||||
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
|
||||
SetupConstBuffer(buffer, entry);
|
||||
}
|
||||
}
|
||||
|
||||
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
|
||||
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
|
||||
const auto& entry = entries[bindpoint];
|
||||
SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry);
|
||||
void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||
for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
|
||||
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
|
||||
const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value();
|
||||
Tegra::Engines::ConstBufferInfo buffer;
|
||||
buffer.address = config.Address();
|
||||
buffer.size = config.size;
|
||||
buffer.enabled = mask[entry.GetIndex()];
|
||||
SetupConstBuffer(buffer, entry);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -752,49 +934,52 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
|
||||
const GLShader::ConstBufferEntry& entry) {
|
||||
if (!buffer.enabled) {
|
||||
// Set values to zero to unbind buffers
|
||||
bind_ubo_pushbuffer.Push(0, 0, 0);
|
||||
bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
|
||||
return;
|
||||
}
|
||||
|
||||
std::size_t size;
|
||||
if (entry.IsIndirect()) {
|
||||
// Buffer is accessed indirectly, so upload the entire thing
|
||||
size = buffer.size;
|
||||
|
||||
if (size > MaxConstbufferSize) {
|
||||
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
|
||||
MaxConstbufferSize);
|
||||
size = MaxConstbufferSize;
|
||||
}
|
||||
} else {
|
||||
// Buffer is accessed directly, upload just what we use
|
||||
size = entry.GetSize();
|
||||
}
|
||||
|
||||
// Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
|
||||
// UBO alignment requirements.
|
||||
size = Common::AlignUp(size, sizeof(GLvec4));
|
||||
ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
|
||||
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
|
||||
|
||||
const std::size_t alignment = device.GetUniformBufferAlignment();
|
||||
const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment);
|
||||
bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size);
|
||||
const auto alignment = device.GetUniformBufferAlignment();
|
||||
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
|
||||
bind_ubo_pushbuffer.Push(cbuf, offset, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader) {
|
||||
const auto& entries = shader->GetShaderEntries().global_memory_entries;
|
||||
for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
|
||||
const auto& entry{entries[bindpoint]};
|
||||
const auto& region{global_cache.GetGlobalRegion(entry, stage)};
|
||||
if (entry.IsWritten()) {
|
||||
region->MarkAsModified(true, global_cache);
|
||||
}
|
||||
bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
|
||||
static_cast<GLsizeiptr>(region->GetSizeInBytes()));
|
||||
void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader) {
|
||||
auto& gpu{system.GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
|
||||
for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
|
||||
const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
|
||||
const auto gpu_addr{memory_manager.Read<u64>(addr)};
|
||||
const auto size{memory_manager.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(entry, gpu_addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
|
||||
auto& gpu{system.GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
|
||||
for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
|
||||
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
|
||||
const auto gpu_addr{memory_manager.Read<u64>(addr)};
|
||||
const auto size{memory_manager.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(entry, gpu_addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry,
|
||||
GPUVAddr gpu_addr, std::size_t size) {
|
||||
const auto alignment{device.GetShaderStorageBufferAlignment()};
|
||||
const auto [ssbo, buffer_offset] =
|
||||
buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten());
|
||||
bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
|
||||
BaseBindings base_bindings) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
@@ -883,10 +1068,11 @@ void RasterizerOpenGL::SyncClipCoef() {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncCullMode() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
state.cull.enabled = regs.cull.enabled != 0;
|
||||
|
||||
if (state.cull.enabled) {
|
||||
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
|
||||
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
|
||||
@@ -919,16 +1105,21 @@ void RasterizerOpenGL::SyncDepthTestState() {
|
||||
state.depth.test_enabled = regs.depth_test_enable != 0;
|
||||
state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
|
||||
|
||||
if (!state.depth.test_enabled)
|
||||
if (!state.depth.test_enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncStencilTestState() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
state.stencil.test_enabled = regs.stencil_enable != 0;
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
if (!maxwell3d.dirty.stencil_test) {
|
||||
return;
|
||||
}
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
state.stencil.test_enabled = regs.stencil_enable != 0;
|
||||
if (!regs.stencil_enable) {
|
||||
return;
|
||||
}
|
||||
@@ -957,10 +1148,17 @@ void RasterizerOpenGL::SyncStencilTestState() {
|
||||
state.stencil.back.action_depth_fail = GL_KEEP;
|
||||
state.stencil.back.action_depth_pass = GL_KEEP;
|
||||
}
|
||||
state.MarkDirtyStencilState();
|
||||
maxwell3d.dirty.stencil_test = false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncColorMask() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
if (!maxwell3d.dirty.color_mask) {
|
||||
return;
|
||||
}
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
const std::size_t count =
|
||||
regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
|
||||
for (std::size_t i = 0; i < count; i++) {
|
||||
@@ -971,6 +1169,9 @@ void RasterizerOpenGL::SyncColorMask() {
|
||||
dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
|
||||
dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
|
||||
}
|
||||
|
||||
state.MarkDirtyColorMask();
|
||||
maxwell3d.dirty.color_mask = false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncMultiSampleState() {
|
||||
@@ -985,7 +1186,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncBlendState() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
if (!maxwell3d.dirty.blend_state) {
|
||||
return;
|
||||
}
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
state.blend_color.red = regs.blend_color.r;
|
||||
state.blend_color.green = regs.blend_color.g;
|
||||
@@ -1008,6 +1213,8 @@ void RasterizerOpenGL::SyncBlendState() {
|
||||
for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
|
||||
state.blend[i].enabled = false;
|
||||
}
|
||||
maxwell3d.dirty.blend_state = false;
|
||||
state.MarkDirtyBlendState();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1024,6 +1231,9 @@ void RasterizerOpenGL::SyncBlendState() {
|
||||
blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
|
||||
blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
|
||||
}
|
||||
|
||||
state.MarkDirtyBlendState();
|
||||
maxwell3d.dirty.blend_state = false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLogicOpState() {
|
||||
@@ -1075,13 +1285,21 @@ void RasterizerOpenGL::SyncPointState() {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncPolygonOffset() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
if (!maxwell3d.dirty.polygon_offset) {
|
||||
return;
|
||||
}
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
|
||||
state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
|
||||
state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
|
||||
state.polygon_offset.units = regs.polygon_offset_units;
|
||||
state.polygon_offset.factor = regs.polygon_offset_factor;
|
||||
state.polygon_offset.clamp = regs.polygon_offset_clamp;
|
||||
|
||||
state.MarkDirtyPolygonOffset();
|
||||
maxwell3d.dirty.polygon_offset = false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncAlphaTest() {
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_global_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
@@ -59,10 +58,12 @@ public:
|
||||
|
||||
void DrawArrays() override;
|
||||
void Clear() override;
|
||||
void DispatchCompute(GPUVAddr code_addr) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
@@ -73,11 +74,6 @@ public:
|
||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
/// Maximum supported size that a constbuffer can have in bytes.
|
||||
static constexpr std::size_t MaxConstbufferSize = 0x10000;
|
||||
static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
|
||||
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
|
||||
|
||||
private:
|
||||
struct FramebufferConfigState {
|
||||
bool using_color_fb{};
|
||||
@@ -113,17 +109,30 @@ private:
|
||||
OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
|
||||
bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
|
||||
|
||||
void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
|
||||
bool using_depth_fb, bool using_stencil_fb);
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader);
|
||||
|
||||
/// Configures the current constbuffers to use for the kernel invocation.
|
||||
void SetupComputeConstBuffers(const Shader& kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const GLShader::ConstBufferEntry& entry);
|
||||
|
||||
/// Configures the current global memory entries to use for the draw command.
|
||||
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader);
|
||||
void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
const Shader& shader);
|
||||
|
||||
/// Configures the current global memory entries to use for the kernel invocation.
|
||||
void SetupComputeGlobalMemory(const Shader& kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
std::size_t size);
|
||||
|
||||
/// Configures the current textures to use for the draw command. Returns shaders texture buffer
|
||||
/// usage.
|
||||
@@ -191,7 +200,6 @@ private:
|
||||
|
||||
TextureCacheOpenGL texture_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
GlobalRegionCacheOpenGL global_cache;
|
||||
SamplerCacheOpenGL sampler_cache;
|
||||
FramebufferCacheOpenGL framebuffer_cache;
|
||||
|
||||
@@ -210,6 +218,7 @@ private:
|
||||
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
OGLBufferCache buffer_cache;
|
||||
|
||||
VertexArrayPushBuffer vertex_array_pushbuffer;
|
||||
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
|
||||
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
|
||||
|
||||
@@ -221,14 +230,19 @@ private:
|
||||
GLuint SetupVertexFormat();
|
||||
|
||||
void SetupVertexBuffer(GLuint vao);
|
||||
void SetupVertexInstances(GLuint vao);
|
||||
|
||||
DrawParameters SetupDraw();
|
||||
GLintptr SetupIndexBuffer();
|
||||
|
||||
DrawParameters SetupDraw(GLintptr index_buffer_offset);
|
||||
|
||||
void SetupShaders(GLenum primitive_mode);
|
||||
|
||||
enum class AccelDraw { Disabled, Arrays, Indexed };
|
||||
AccelDraw accelerate_draw = AccelDraw::Disabled;
|
||||
|
||||
OGLFramebuffer clear_framebuffer;
|
||||
|
||||
using CachedPageMap = boost::icl::interval_map<u64, int>;
|
||||
CachedPageMap cached_pages;
|
||||
};
|
||||
|
||||
@@ -23,13 +23,13 @@ namespace OpenGL {
|
||||
|
||||
using VideoCommon::Shader::ProgramCode;
|
||||
|
||||
// One UBO is always reserved for emulation values
|
||||
constexpr u32 RESERVED_UBOS = 1;
|
||||
// One UBO is always reserved for emulation values on staged shaders
|
||||
constexpr u32 STAGE_RESERVED_UBOS = 1;
|
||||
|
||||
struct UnspecializedShader {
|
||||
std::string code;
|
||||
GLShader::ShaderEntries entries;
|
||||
Maxwell::ShaderProgram program_type;
|
||||
ProgramType program_type;
|
||||
};
|
||||
|
||||
namespace {
|
||||
@@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
|
||||
}
|
||||
|
||||
/// Gets the shader type from a Maxwell program type
|
||||
constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
|
||||
constexpr GLenum GetShaderType(ProgramType program_type) {
|
||||
switch (program_type) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
case ProgramType::VertexA:
|
||||
case ProgramType::VertexB:
|
||||
return GL_VERTEX_SHADER;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
case ProgramType::Geometry:
|
||||
return GL_GEOMETRY_SHADER;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
case ProgramType::Fragment:
|
||||
return GL_FRAGMENT_SHADER;
|
||||
case ProgramType::Compute:
|
||||
return GL_COMPUTE_SHADER;
|
||||
default:
|
||||
return GL_NONE;
|
||||
}
|
||||
@@ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen
|
||||
}
|
||||
}
|
||||
|
||||
ProgramType GetProgramType(Maxwell::ShaderProgram program) {
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
return ProgramType::VertexA;
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
return ProgramType::VertexB;
|
||||
case Maxwell::ShaderProgram::TesselationControl:
|
||||
return ProgramType::TessellationControl;
|
||||
case Maxwell::ShaderProgram::TesselationEval:
|
||||
return ProgramType::TessellationEval;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
return ProgramType::Geometry;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
return ProgramType::Fragment;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
/// Calculates the size of a program stream
|
||||
std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
|
||||
constexpr std::size_t start_offset = 10;
|
||||
@@ -128,13 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
|
||||
}
|
||||
|
||||
/// Hashes one (or two) program streams
|
||||
u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
|
||||
u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
|
||||
const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
|
||||
if (size_a == 0) {
|
||||
size_a = CalculateProgramSize(code);
|
||||
}
|
||||
u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
|
||||
if (program_type != Maxwell::ShaderProgram::VertexA) {
|
||||
if (program_type != ProgramType::VertexA) {
|
||||
return unique_identifier;
|
||||
}
|
||||
// VertexA programs include two programs
|
||||
@@ -152,12 +173,12 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
|
||||
}
|
||||
|
||||
/// Creates an unspecialized program from code streams
|
||||
GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type,
|
||||
GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
|
||||
ProgramCode program_code, ProgramCode program_code_b) {
|
||||
GLShader::ShaderSetup setup(program_code);
|
||||
setup.program.size_a = CalculateProgramSize(program_code);
|
||||
setup.program.size_b = 0;
|
||||
if (program_type == Maxwell::ShaderProgram::VertexA) {
|
||||
if (program_type == ProgramType::VertexA) {
|
||||
// VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
|
||||
// Conventional HW does not support this, so we combine VertexA and VertexB into one
|
||||
// stage here.
|
||||
@@ -168,30 +189,41 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
|
||||
program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
|
||||
|
||||
switch (program_type) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
case ProgramType::VertexA:
|
||||
case ProgramType::VertexB:
|
||||
return GLShader::GenerateVertexShader(device, setup);
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
case ProgramType::Geometry:
|
||||
return GLShader::GenerateGeometryShader(device, setup);
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
case ProgramType::Fragment:
|
||||
return GLShader::GenerateFragmentShader(device, setup);
|
||||
case ProgramType::Compute:
|
||||
return GLShader::GenerateComputeShader(device, setup);
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
|
||||
UNREACHABLE();
|
||||
UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
|
||||
Maxwell::ShaderProgram program_type, const ProgramVariant& variant,
|
||||
ProgramType program_type, const ProgramVariant& variant,
|
||||
bool hint_retrievable = false) {
|
||||
auto base_bindings{variant.base_bindings};
|
||||
const auto primitive_mode{variant.primitive_mode};
|
||||
const auto texture_buffer_usage{variant.texture_buffer_usage};
|
||||
|
||||
std::string source = "#version 430 core\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n\n";
|
||||
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n";
|
||||
if (entries.shader_viewport_layer_array) {
|
||||
source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
|
||||
}
|
||||
if (program_type == ProgramType::Compute) {
|
||||
source += "#extension GL_ARB_compute_variable_group_size : require\n";
|
||||
}
|
||||
source += '\n';
|
||||
|
||||
if (program_type != ProgramType::Compute) {
|
||||
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
|
||||
}
|
||||
|
||||
for (const auto& cbuf : entries.const_buffers) {
|
||||
source +=
|
||||
@@ -218,13 +250,16 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
|
||||
source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
|
||||
}
|
||||
|
||||
if (program_type == Maxwell::ShaderProgram::Geometry) {
|
||||
if (program_type == ProgramType::Geometry) {
|
||||
const auto [glsl_topology, debug_name, max_vertices] =
|
||||
GetPrimitiveDescription(primitive_mode);
|
||||
|
||||
source += "layout (" + std::string(glsl_topology) + ") in;\n";
|
||||
source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
|
||||
}
|
||||
if (program_type == ProgramType::Compute) {
|
||||
source += "layout (local_size_variable) in;\n";
|
||||
}
|
||||
|
||||
source += code;
|
||||
|
||||
@@ -252,7 +287,7 @@ std::set<GLenum> GetSupportedFormats() {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type,
|
||||
CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
|
||||
GLShader::ProgramResult result)
|
||||
: RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr},
|
||||
unique_identifier{params.unique_identifier}, program_type{program_type},
|
||||
@@ -265,29 +300,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||
ProgramCode&& program_code_b) {
|
||||
const auto code_size{CalculateProgramSize(program_code)};
|
||||
const auto code_size_b{CalculateProgramSize(program_code_b)};
|
||||
auto result{CreateProgram(params.device, program_type, program_code, program_code_b)};
|
||||
auto result{
|
||||
CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
|
||||
if (result.first.empty()) {
|
||||
// TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
|
||||
return {};
|
||||
}
|
||||
|
||||
params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
|
||||
params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)),
|
||||
static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code),
|
||||
std::move(program_code_b)));
|
||||
params.unique_identifier, GetProgramType(program_type),
|
||||
static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)),
|
||||
std::move(program_code), std::move(program_code_b)));
|
||||
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result)));
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params, GetProgramType(program_type), std::move(result)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type,
|
||||
GLShader::ProgramResult result) {
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result)));
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params, GetProgramType(program_type), std::move(result)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
|
||||
auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
|
||||
|
||||
const auto code_size{CalculateProgramSize(code)};
|
||||
params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
|
||||
static_cast<u32>(code_size / sizeof(u64)), 0,
|
||||
std::move(code), {}));
|
||||
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params, ProgramType::Compute, std::move(result)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
|
||||
GLShader::ProgramResult result) {
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params, ProgramType::Compute, std::move(result)));
|
||||
}
|
||||
|
||||
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
|
||||
GLuint handle{};
|
||||
if (program_type == Maxwell::ShaderProgram::Geometry) {
|
||||
if (program_type == ProgramType::Geometry) {
|
||||
handle = GetGeometryShader(variant);
|
||||
} else {
|
||||
const auto [entry, is_cache_miss] = programs.try_emplace(variant);
|
||||
@@ -305,8 +361,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
|
||||
handle = program->handle;
|
||||
}
|
||||
|
||||
auto base_bindings{variant.base_bindings};
|
||||
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
|
||||
auto base_bindings = variant.base_bindings;
|
||||
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
|
||||
if (program_type != ProgramType::Compute) {
|
||||
base_bindings.cbuf += STAGE_RESERVED_UBOS;
|
||||
}
|
||||
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
|
||||
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
|
||||
|
||||
@@ -569,7 +628,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
|
||||
}
|
||||
|
||||
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
if (!system.GPU().Maxwell3D().dirty_flags.shaders) {
|
||||
if (!system.GPU().Maxwell3D().dirty.shaders) {
|
||||
return last_shaders[static_cast<std::size_t>(program)];
|
||||
}
|
||||
|
||||
@@ -586,13 +645,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
// No shader found - create a new one
|
||||
ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
|
||||
ProgramCode program_code_b;
|
||||
if (program == Maxwell::ShaderProgram::VertexA) {
|
||||
const bool is_program_a{program == Maxwell::ShaderProgram::VertexA};
|
||||
if (is_program_a) {
|
||||
const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
|
||||
program_code_b = GetShaderCode(memory_manager, program_addr_b,
|
||||
memory_manager.GetPointer(program_addr_b));
|
||||
}
|
||||
|
||||
const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
|
||||
const auto unique_identifier =
|
||||
GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
|
||||
const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
|
||||
const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
|
||||
host_ptr, unique_identifier};
|
||||
@@ -609,4 +670,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
||||
}
|
||||
|
||||
Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||
auto& memory_manager{system.GPU().MemoryManager()};
|
||||
const auto host_ptr{memory_manager.GetPointer(code_addr)};
|
||||
auto kernel = TryGet(host_ptr);
|
||||
if (kernel) {
|
||||
return kernel;
|
||||
}
|
||||
|
||||
// No kernel found - create a new one
|
||||
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
|
||||
const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
|
||||
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
|
||||
const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
|
||||
host_ptr, unique_identifier};
|
||||
|
||||
const auto found = precompiled_shaders.find(unique_identifier);
|
||||
if (found == precompiled_shaders.end()) {
|
||||
kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
|
||||
} else {
|
||||
kernel = CachedShader::CreateKernelFromCache(params, found->second);
|
||||
}
|
||||
|
||||
Register(kernel);
|
||||
return kernel;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -61,6 +61,11 @@ public:
|
||||
Maxwell::ShaderProgram program_type,
|
||||
GLShader::ProgramResult result);
|
||||
|
||||
static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code);
|
||||
|
||||
static Shader CreateKernelFromCache(const ShaderParameters& params,
|
||||
GLShader::ProgramResult result);
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
@@ -78,7 +83,7 @@ public:
|
||||
std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
|
||||
|
||||
private:
|
||||
explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type,
|
||||
explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
|
||||
GLShader::ProgramResult result);
|
||||
|
||||
// Geometry programs. These are needed because GLSL needs an input topology but it's not
|
||||
@@ -104,7 +109,7 @@ private:
|
||||
u8* host_ptr{};
|
||||
VAddr cpu_addr{};
|
||||
u64 unique_identifier{};
|
||||
Maxwell::ShaderProgram program_type{};
|
||||
ProgramType program_type{};
|
||||
ShaderDiskCacheOpenGL& disk_cache;
|
||||
const PrecompiledPrograms& precompiled_programs;
|
||||
|
||||
@@ -132,6 +137,9 @@ public:
|
||||
/// Gets the current specified shader stage program
|
||||
Shader GetStageProgram(Maxwell::ShaderProgram program);
|
||||
|
||||
/// Gets a compute kernel in the passed address
|
||||
Shader GetComputeKernel(GPUVAddr code_addr);
|
||||
|
||||
protected:
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void FlushObjectInner(const Shader& object) override {}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
@@ -36,7 +37,6 @@ using namespace std::string_literals;
|
||||
using namespace VideoCommon::Shader;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
|
||||
using Operation = const OperationNode&;
|
||||
|
||||
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
|
||||
@@ -46,7 +46,7 @@ using TextureArgument = std::pair<Type, Node>;
|
||||
using TextureIR = std::variant<TextureAoffi, TextureArgument>;
|
||||
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
||||
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
|
||||
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
|
||||
|
||||
class ShaderWriter {
|
||||
public:
|
||||
@@ -161,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
|
||||
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
|
||||
}
|
||||
|
||||
constexpr bool IsVertexShader(ProgramType stage) {
|
||||
return stage == ProgramType::VertexA || stage == ProgramType::VertexB;
|
||||
}
|
||||
|
||||
class GLSLDecompiler final {
|
||||
public:
|
||||
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage,
|
||||
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage,
|
||||
std::string suffix)
|
||||
: device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
|
||||
|
||||
@@ -246,24 +250,22 @@ public:
|
||||
usage.is_read, usage.is_written);
|
||||
}
|
||||
entries.clip_distances = ir.GetClipDistances();
|
||||
entries.shader_viewport_layer_array =
|
||||
IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex());
|
||||
entries.shader_length = ir.GetLength();
|
||||
return entries;
|
||||
}
|
||||
|
||||
private:
|
||||
using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
|
||||
using OperationDecompilersArray =
|
||||
std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
|
||||
|
||||
void DeclareVertex() {
|
||||
if (stage != ShaderStage::Vertex)
|
||||
if (!IsVertexShader(stage))
|
||||
return;
|
||||
|
||||
DeclareVertexRedeclarations();
|
||||
}
|
||||
|
||||
void DeclareGeometry() {
|
||||
if (stage != ShaderStage::Geometry) {
|
||||
if (stage != ProgramType::Geometry) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -282,22 +284,35 @@ private:
|
||||
}
|
||||
|
||||
void DeclareVertexRedeclarations() {
|
||||
bool clip_distances_declared = false;
|
||||
|
||||
code.AddLine("out gl_PerVertex {{");
|
||||
++code.scope;
|
||||
|
||||
code.AddLine("vec4 gl_Position;");
|
||||
|
||||
for (const auto o : ir.GetOutputAttributes()) {
|
||||
if (o == Attribute::Index::PointSize)
|
||||
code.AddLine("float gl_PointSize;");
|
||||
if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
|
||||
o == Attribute::Index::ClipDistances4567)) {
|
||||
for (const auto attribute : ir.GetOutputAttributes()) {
|
||||
if (attribute == Attribute::Index::ClipDistances0123 ||
|
||||
attribute == Attribute::Index::ClipDistances4567) {
|
||||
code.AddLine("float gl_ClipDistance[];");
|
||||
clip_distances_declared = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) {
|
||||
if (ir.UsesLayer()) {
|
||||
code.AddLine("int gl_Layer;");
|
||||
}
|
||||
if (ir.UsesViewportIndex()) {
|
||||
code.AddLine("int gl_ViewportIndex;");
|
||||
}
|
||||
} else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) &&
|
||||
!device.HasVertexViewportLayer()) {
|
||||
LOG_ERROR(
|
||||
Render_OpenGL,
|
||||
"GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
|
||||
}
|
||||
|
||||
if (ir.UsesPointSize()) {
|
||||
code.AddLine("float gl_PointSize;");
|
||||
}
|
||||
|
||||
--code.scope;
|
||||
code.AddLine("}};");
|
||||
@@ -325,11 +340,16 @@ private:
|
||||
}
|
||||
|
||||
void DeclareLocalMemory() {
|
||||
if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
|
||||
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
|
||||
code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
|
||||
code.AddNewLine();
|
||||
// TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
|
||||
// specialization time.
|
||||
const u64 local_memory_size =
|
||||
stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
|
||||
if (local_memory_size == 0) {
|
||||
return;
|
||||
}
|
||||
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
|
||||
code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
|
||||
code.AddNewLine();
|
||||
}
|
||||
|
||||
void DeclareInternalFlags() {
|
||||
@@ -383,12 +403,12 @@ private:
|
||||
const u32 location{GetGenericAttributeIndex(index)};
|
||||
|
||||
std::string name{GetInputAttribute(index)};
|
||||
if (stage == ShaderStage::Geometry) {
|
||||
if (stage == ProgramType::Geometry) {
|
||||
name = "gs_" + name + "[]";
|
||||
}
|
||||
|
||||
std::string suffix;
|
||||
if (stage == ShaderStage::Fragment) {
|
||||
if (stage == ProgramType::Fragment) {
|
||||
const auto input_mode{header.ps.GetAttributeUse(location)};
|
||||
if (skip_unused && input_mode == AttributeUse::Unused) {
|
||||
return;
|
||||
@@ -400,7 +420,7 @@ private:
|
||||
}
|
||||
|
||||
void DeclareOutputAttributes() {
|
||||
if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) {
|
||||
if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) {
|
||||
for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
|
||||
DeclareOutputAttribute(ToGenericAttribute(i));
|
||||
}
|
||||
@@ -522,7 +542,7 @@ private:
|
||||
constexpr u32 element_stride{4};
|
||||
const u32 address{generic_base + index * generic_stride + element * element_stride};
|
||||
|
||||
const bool declared{stage != ShaderStage::Fragment ||
|
||||
const bool declared{stage != ProgramType::Fragment ||
|
||||
header.ps.GetAttributeUse(index) != AttributeUse::Unused};
|
||||
const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
|
||||
code.AddLine("case 0x{:x}: return {};", address, value);
|
||||
@@ -626,7 +646,7 @@ private:
|
||||
}
|
||||
|
||||
if (const auto abuf = std::get_if<AbufNode>(&*node)) {
|
||||
UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry,
|
||||
UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,
|
||||
"Physical attributes in geometry shaders are not implemented");
|
||||
if (abuf->IsPhysicalBuffer()) {
|
||||
return fmt::format("readPhysicalAttribute(ftou({}))",
|
||||
@@ -681,6 +701,9 @@ private:
|
||||
}
|
||||
|
||||
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
|
||||
if (stage == ProgramType::Compute) {
|
||||
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
|
||||
}
|
||||
return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
|
||||
}
|
||||
|
||||
@@ -710,7 +733,7 @@ private:
|
||||
|
||||
std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
|
||||
const auto GeometryPass = [&](std::string_view name) {
|
||||
if (stage == ShaderStage::Geometry && buffer) {
|
||||
if (stage == ProgramType::Geometry && buffer) {
|
||||
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
|
||||
// set an 0x80000000 index for those and the shader fails to build. Find out why
|
||||
// this happens and what's its intent.
|
||||
@@ -722,10 +745,10 @@ private:
|
||||
switch (attribute) {
|
||||
case Attribute::Index::Position:
|
||||
switch (stage) {
|
||||
case ShaderStage::Geometry:
|
||||
case ProgramType::Geometry:
|
||||
return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
|
||||
GetSwizzle(element));
|
||||
case ShaderStage::Fragment:
|
||||
case ProgramType::Fragment:
|
||||
return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
@@ -746,7 +769,7 @@ private:
|
||||
// TODO(Subv): Find out what the values are for the first two elements when inside a
|
||||
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
|
||||
// shader.
|
||||
ASSERT(stage == ShaderStage::Vertex);
|
||||
ASSERT(IsVertexShader(stage));
|
||||
switch (element) {
|
||||
case 2:
|
||||
// Config pack's first value is instance_id.
|
||||
@@ -758,7 +781,7 @@ private:
|
||||
return "0";
|
||||
case Attribute::Index::FrontFacing:
|
||||
// TODO(Subv): Find out what the values are for the other elements.
|
||||
ASSERT(stage == ShaderStage::Fragment);
|
||||
ASSERT(stage == ProgramType::Fragment);
|
||||
switch (element) {
|
||||
case 3:
|
||||
return "itof(gl_FrontFacing ? -1 : 0)";
|
||||
@@ -780,7 +803,7 @@ private:
|
||||
return value;
|
||||
}
|
||||
// There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
|
||||
const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
|
||||
const std::string precise = stage != ProgramType::Fragment ? "precise " : "";
|
||||
|
||||
const std::string temporary = code.GenerateTemporary();
|
||||
code.AddLine("{}float {} = {};", precise, temporary, value);
|
||||
@@ -805,6 +828,45 @@ private:
|
||||
return CastOperand(VisitOperand(operation, operand_index), type);
|
||||
}
|
||||
|
||||
std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) {
|
||||
switch (const auto attribute = abuf->GetIndex()) {
|
||||
case Attribute::Index::Position:
|
||||
return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false);
|
||||
case Attribute::Index::LayerViewportPointSize:
|
||||
switch (abuf->GetElement()) {
|
||||
case 0:
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
case 1:
|
||||
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
|
||||
return {};
|
||||
}
|
||||
return std::make_pair("gl_Layer", true);
|
||||
case 2:
|
||||
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
|
||||
return {};
|
||||
}
|
||||
return std::make_pair("gl_ViewportIndex", true);
|
||||
case 3:
|
||||
UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
|
||||
return std::make_pair("gl_PointSize", false);
|
||||
}
|
||||
return {};
|
||||
case Attribute::Index::ClipDistances0123:
|
||||
return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false);
|
||||
case Attribute::Index::ClipDistances4567:
|
||||
return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4),
|
||||
false);
|
||||
default:
|
||||
if (IsGenericAttribute(attribute)) {
|
||||
return std::make_pair(
|
||||
GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false);
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
std::string CastOperand(const std::string& value, Type type) const {
|
||||
switch (type) {
|
||||
case Type::Bool:
|
||||
@@ -1001,6 +1063,8 @@ private:
|
||||
const Node& src = operation[1];
|
||||
|
||||
std::string target;
|
||||
bool is_integer = false;
|
||||
|
||||
if (const auto gpr = std::get_if<GprNode>(&*dest)) {
|
||||
if (gpr->GetIndex() == Register::ZeroIndex) {
|
||||
// Writing to Register::ZeroIndex is a no op
|
||||
@@ -1009,27 +1073,16 @@ private:
|
||||
target = GetRegister(gpr->GetIndex());
|
||||
} else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
|
||||
UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
|
||||
|
||||
target = [&]() -> std::string {
|
||||
switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
|
||||
case Attribute::Index::Position:
|
||||
return "gl_Position"s + GetSwizzle(abuf->GetElement());
|
||||
case Attribute::Index::PointSize:
|
||||
return "gl_PointSize";
|
||||
case Attribute::Index::ClipDistances0123:
|
||||
return fmt::format("gl_ClipDistance[{}]", abuf->GetElement());
|
||||
case Attribute::Index::ClipDistances4567:
|
||||
return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4);
|
||||
default:
|
||||
if (IsGenericAttribute(attribute)) {
|
||||
return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
|
||||
static_cast<u32>(attribute));
|
||||
return "0";
|
||||
}
|
||||
}();
|
||||
const auto result = GetOutputAttribute(abuf);
|
||||
if (!result) {
|
||||
return {};
|
||||
}
|
||||
target = result->first;
|
||||
is_integer = result->second;
|
||||
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
|
||||
if (stage == ProgramType::Compute) {
|
||||
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
|
||||
}
|
||||
target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
|
||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||
const std::string real = Visit(gmem->GetRealAddress());
|
||||
@@ -1040,7 +1093,11 @@ private:
|
||||
UNREACHABLE_MSG("Assign called without a proper target");
|
||||
}
|
||||
|
||||
code.AddLine("{} = {};", target, Visit(src));
|
||||
if (is_integer) {
|
||||
code.AddLine("{} = ftoi({});", target, Visit(src));
|
||||
} else {
|
||||
code.AddLine("{} = {};", target, Visit(src));
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -1353,14 +1410,10 @@ private:
|
||||
return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
|
||||
}
|
||||
|
||||
std::string LogicalAll2(Operation operation) {
|
||||
std::string LogicalAnd2(Operation operation) {
|
||||
return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
|
||||
}
|
||||
|
||||
std::string LogicalAny2(Operation operation) {
|
||||
return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
|
||||
}
|
||||
|
||||
template <bool with_nan>
|
||||
std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
|
||||
const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
|
||||
@@ -1583,7 +1636,7 @@ private:
|
||||
}
|
||||
|
||||
std::string Exit(Operation operation) {
|
||||
if (stage != ShaderStage::Fragment) {
|
||||
if (stage != ProgramType::Fragment) {
|
||||
code.AddLine("return;");
|
||||
return {};
|
||||
}
|
||||
@@ -1634,7 +1687,7 @@ private:
|
||||
}
|
||||
|
||||
std::string EmitVertex(Operation operation) {
|
||||
ASSERT_MSG(stage == ShaderStage::Geometry,
|
||||
ASSERT_MSG(stage == ProgramType::Geometry,
|
||||
"EmitVertex is expected to be used in a geometry shader.");
|
||||
|
||||
// If a geometry shader is attached, it will always flip (it's the last stage before
|
||||
@@ -1645,7 +1698,7 @@ private:
|
||||
}
|
||||
|
||||
std::string EndPrimitive(Operation operation) {
|
||||
ASSERT_MSG(stage == ShaderStage::Geometry,
|
||||
ASSERT_MSG(stage == ProgramType::Geometry,
|
||||
"EndPrimitive is expected to be used in a geometry shader.");
|
||||
|
||||
code.AddLine("EndPrimitive();");
|
||||
@@ -1667,7 +1720,7 @@ private:
|
||||
return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
|
||||
}
|
||||
|
||||
static constexpr OperationDecompilersArray operation_decompilers = {
|
||||
static constexpr std::array operation_decompilers = {
|
||||
&GLSLDecompiler::Assign,
|
||||
|
||||
&GLSLDecompiler::Select,
|
||||
@@ -1751,8 +1804,7 @@ private:
|
||||
&GLSLDecompiler::LogicalXor,
|
||||
&GLSLDecompiler::LogicalNegate,
|
||||
&GLSLDecompiler::LogicalPick2,
|
||||
&GLSLDecompiler::LogicalAll2,
|
||||
&GLSLDecompiler::LogicalAny2,
|
||||
&GLSLDecompiler::LogicalAnd2,
|
||||
|
||||
&GLSLDecompiler::LogicalLessThan<Type::Float>,
|
||||
&GLSLDecompiler::LogicalEqual<Type::Float>,
|
||||
@@ -1816,6 +1868,7 @@ private:
|
||||
&GLSLDecompiler::WorkGroupId<1>,
|
||||
&GLSLDecompiler::WorkGroupId<2>,
|
||||
};
|
||||
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
|
||||
|
||||
std::string GetRegister(u32 index) const {
|
||||
return GetDeclarationWithSuffix(index, "gpr");
|
||||
@@ -1880,7 +1933,7 @@ private:
|
||||
}
|
||||
|
||||
u32 GetNumPhysicalInputAttributes() const {
|
||||
return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
|
||||
return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
|
||||
}
|
||||
|
||||
u32 GetNumPhysicalAttributes() const {
|
||||
@@ -1893,7 +1946,7 @@ private:
|
||||
|
||||
const Device& device;
|
||||
const ShaderIR& ir;
|
||||
const ShaderStage stage;
|
||||
const ProgramType stage;
|
||||
const std::string suffix;
|
||||
const Header header;
|
||||
|
||||
@@ -1924,7 +1977,7 @@ std::string GetCommonDeclarations() {
|
||||
MAX_CONSTBUFFER_ELEMENTS);
|
||||
}
|
||||
|
||||
ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage,
|
||||
ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
|
||||
const std::string& suffix) {
|
||||
GLSLDecompiler decompiler(device, ir, stage, suffix);
|
||||
decompiler.Decompile();
|
||||
|
||||
@@ -12,14 +12,26 @@
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace OpenGL {
|
||||
class Device;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class ShaderIR;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
enum class ProgramType : u32 {
|
||||
VertexA = 0,
|
||||
VertexB = 1,
|
||||
TessellationControl = 2,
|
||||
TessellationEval = 3,
|
||||
Geometry = 4,
|
||||
Fragment = 5,
|
||||
Compute = 6
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace OpenGL::GLShader {
|
||||
|
||||
struct ShaderEntries;
|
||||
@@ -78,12 +90,13 @@ struct ShaderEntries {
|
||||
std::vector<ImageEntry> images;
|
||||
std::vector<GlobalMemoryEntry> global_memory_entries;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
bool shader_viewport_layer_array{};
|
||||
std::size_t shader_length{};
|
||||
};
|
||||
|
||||
std::string GetCommonDeclarations();
|
||||
|
||||
ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
Maxwell::ShaderStage stage, const std::string& suffix);
|
||||
ProgramType stage, const std::string& suffix);
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
|
||||
@@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||
|
||||
} // namespace
|
||||
|
||||
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
||||
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
|
||||
u32 program_code_size, u32 program_code_size_b,
|
||||
ProgramCode program_code, ProgramCode program_code_b)
|
||||
: unique_identifier{unique_identifier}, program_type{program_type},
|
||||
@@ -373,6 +373,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
|
||||
}
|
||||
}
|
||||
|
||||
bool shader_viewport_layer_array{};
|
||||
if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) {
|
||||
return {};
|
||||
}
|
||||
entry.entries.shader_viewport_layer_array = shader_viewport_layer_array;
|
||||
|
||||
u64 shader_length{};
|
||||
if (!LoadObjectFromPrecompiled(shader_length)) {
|
||||
return {};
|
||||
@@ -445,6 +451,10 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
|
||||
}
|
||||
}
|
||||
|
||||
if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/file_sys/vfs_vector.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
|
||||
namespace Core {
|
||||
@@ -34,14 +33,11 @@ namespace OpenGL {
|
||||
struct ShaderDiskCacheUsage;
|
||||
struct ShaderDiskCacheDump;
|
||||
|
||||
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
|
||||
|
||||
using ProgramCode = std::vector<u64>;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
|
||||
using TextureBufferUsage = std::bitset<64>;
|
||||
|
||||
/// Allocated bindings used by an OpenGL shader program.
|
||||
/// Allocated bindings used by an OpenGL shader program
|
||||
struct BaseBindings {
|
||||
u32 cbuf{};
|
||||
u32 gmem{};
|
||||
@@ -126,7 +122,7 @@ namespace OpenGL {
|
||||
/// Describes a shader how it's used by the guest GPU
|
||||
class ShaderDiskCacheRaw {
|
||||
public:
|
||||
explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
|
||||
explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
|
||||
u32 program_code_size, u32 program_code_size_b,
|
||||
ProgramCode program_code, ProgramCode program_code_b);
|
||||
ShaderDiskCacheRaw();
|
||||
@@ -141,30 +137,13 @@ public:
|
||||
}
|
||||
|
||||
bool HasProgramA() const {
|
||||
return program_type == Maxwell::ShaderProgram::VertexA;
|
||||
return program_type == ProgramType::VertexA;
|
||||
}
|
||||
|
||||
Maxwell::ShaderProgram GetProgramType() const {
|
||||
ProgramType GetProgramType() const {
|
||||
return program_type;
|
||||
}
|
||||
|
||||
Maxwell::ShaderStage GetProgramStage() const {
|
||||
switch (program_type) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
return Maxwell::ShaderStage::Vertex;
|
||||
case Maxwell::ShaderProgram::TesselationControl:
|
||||
return Maxwell::ShaderStage::TesselationControl;
|
||||
case Maxwell::ShaderProgram::TesselationEval:
|
||||
return Maxwell::ShaderStage::TesselationEval;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
return Maxwell::ShaderStage::Geometry;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
return Maxwell::ShaderStage::Fragment;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
const ProgramCode& GetProgramCode() const {
|
||||
return program_code;
|
||||
}
|
||||
@@ -175,7 +154,7 @@ public:
|
||||
|
||||
private:
|
||||
u64 unique_identifier{};
|
||||
Maxwell::ShaderProgram program_type{};
|
||||
ProgramType program_type{};
|
||||
u32 program_code_size{};
|
||||
u32 program_code_size_b{};
|
||||
|
||||
|
||||
@@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D;
|
||||
using VideoCommon::Shader::ProgramCode;
|
||||
using VideoCommon::Shader::ShaderIR;
|
||||
|
||||
static constexpr u32 PROGRAM_OFFSET{10};
|
||||
static constexpr u32 PROGRAM_OFFSET = 10;
|
||||
static constexpr u32 COMPUTE_OFFSET = 0;
|
||||
|
||||
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
|
||||
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
|
||||
@@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
|
||||
};
|
||||
|
||||
)";
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
|
||||
ProgramResult program =
|
||||
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
|
||||
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
|
||||
const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
|
||||
ProgramResult program = Decompile(device, program_ir, stage, "vertex");
|
||||
out += program.first;
|
||||
|
||||
if (setup.IsDualProgram()) {
|
||||
const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b);
|
||||
ProgramResult program_b =
|
||||
Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
|
||||
|
||||
ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
|
||||
out += program_b.first;
|
||||
}
|
||||
|
||||
@@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
|
||||
};
|
||||
|
||||
)";
|
||||
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
|
||||
ProgramResult program =
|
||||
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
|
||||
ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
|
||||
out += program.first;
|
||||
|
||||
out += R"(
|
||||
@@ -116,9 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
|
||||
|
||||
)";
|
||||
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
|
||||
ProgramResult program =
|
||||
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
|
||||
|
||||
ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
|
||||
out += program.first;
|
||||
|
||||
out += R"(
|
||||
@@ -130,4 +127,22 @@ void main() {
|
||||
return {std::move(out), std::move(program.second)};
|
||||
}
|
||||
|
||||
ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
|
||||
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
|
||||
|
||||
std::string out = "// Shader Unique Id: CS" + id + "\n\n";
|
||||
out += GetCommonDeclarations();
|
||||
|
||||
const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a);
|
||||
ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
|
||||
out += program.first;
|
||||
|
||||
out += R"(
|
||||
void main() {
|
||||
execute_compute();
|
||||
}
|
||||
)";
|
||||
return {std::move(out), std::move(program.second)};
|
||||
}
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
|
||||
@@ -54,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se
|
||||
/// Generates the GLSL fragment shader program source code for the given FS program
|
||||
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
|
||||
|
||||
/// Generates the GLSL compute shader program source code for the given CS program
|
||||
ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
|
||||
@@ -10,21 +10,25 @@
|
||||
|
||||
namespace OpenGL::GLShader {
|
||||
|
||||
GLuint LoadShader(const char* source, GLenum type) {
|
||||
const char* debug_type;
|
||||
namespace {
|
||||
const char* GetStageDebugName(GLenum type) {
|
||||
switch (type) {
|
||||
case GL_VERTEX_SHADER:
|
||||
debug_type = "vertex";
|
||||
break;
|
||||
return "vertex";
|
||||
case GL_GEOMETRY_SHADER:
|
||||
debug_type = "geometry";
|
||||
break;
|
||||
return "geometry";
|
||||
case GL_FRAGMENT_SHADER:
|
||||
debug_type = "fragment";
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return "fragment";
|
||||
case GL_COMPUTE_SHADER:
|
||||
return "compute";
|
||||
}
|
||||
UNIMPLEMENTED();
|
||||
return "unknown";
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
GLuint LoadShader(const char* source, GLenum type) {
|
||||
const char* debug_type = GetStageDebugName(type);
|
||||
const GLuint shader_id = glCreateShader(type);
|
||||
glShaderSource(shader_id, 1, &source, nullptr);
|
||||
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
|
||||
|
||||
@@ -6,8 +6,11 @@
|
||||
#include <glad/glad.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
@@ -162,6 +165,25 @@ OpenGLState::OpenGLState() {
|
||||
alpha_test.ref = 0.0f;
|
||||
}
|
||||
|
||||
void OpenGLState::SetDefaultViewports() {
|
||||
for (auto& item : viewports) {
|
||||
item.x = 0;
|
||||
item.y = 0;
|
||||
item.width = 0;
|
||||
item.height = 0;
|
||||
item.depth_range_near = 0.0f;
|
||||
item.depth_range_far = 1.0f;
|
||||
item.scissor.enabled = false;
|
||||
item.scissor.x = 0;
|
||||
item.scissor.y = 0;
|
||||
item.scissor.width = 0;
|
||||
item.scissor.height = 0;
|
||||
}
|
||||
|
||||
depth_clamp.far_plane = false;
|
||||
depth_clamp.near_plane = false;
|
||||
}
|
||||
|
||||
void OpenGLState::ApplyDefaultState() {
|
||||
glEnable(GL_BLEND);
|
||||
glDisable(GL_FRAMEBUFFER_SRGB);
|
||||
@@ -523,7 +545,8 @@ void OpenGLState::ApplySamplers() const {
|
||||
}
|
||||
}
|
||||
|
||||
void OpenGLState::Apply() const {
|
||||
void OpenGLState::Apply() {
|
||||
MICROPROFILE_SCOPE(OpenGL_State);
|
||||
ApplyFramebufferState();
|
||||
ApplyVertexArrayState();
|
||||
ApplyShaderProgram();
|
||||
@@ -532,19 +555,31 @@ void OpenGLState::Apply() const {
|
||||
ApplyPointSize();
|
||||
ApplyFragmentColorClamp();
|
||||
ApplyMultisample();
|
||||
if (dirty.color_mask) {
|
||||
ApplyColorMask();
|
||||
dirty.color_mask = false;
|
||||
}
|
||||
ApplyDepthClamp();
|
||||
ApplyColorMask();
|
||||
ApplyViewport();
|
||||
ApplyStencilTest();
|
||||
if (dirty.stencil_state) {
|
||||
ApplyStencilTest();
|
||||
dirty.stencil_state = false;
|
||||
}
|
||||
ApplySRgb();
|
||||
ApplyCulling();
|
||||
ApplyDepth();
|
||||
ApplyPrimitiveRestart();
|
||||
ApplyBlending();
|
||||
if (dirty.blend_state) {
|
||||
ApplyBlending();
|
||||
dirty.blend_state = false;
|
||||
}
|
||||
ApplyLogicOp();
|
||||
ApplyTextures();
|
||||
ApplySamplers();
|
||||
ApplyPolygonOffset();
|
||||
if (dirty.polygon_offset) {
|
||||
ApplyPolygonOffset();
|
||||
dirty.polygon_offset = false;
|
||||
}
|
||||
ApplyAlphaTest();
|
||||
}
|
||||
|
||||
|
||||
@@ -195,8 +195,9 @@ public:
|
||||
s_rgb_used = false;
|
||||
}
|
||||
|
||||
void SetDefaultViewports();
|
||||
/// Apply this state as the current OpenGL state
|
||||
void Apply() const;
|
||||
void Apply();
|
||||
|
||||
void ApplyFramebufferState() const;
|
||||
void ApplyVertexArrayState() const;
|
||||
@@ -237,11 +238,41 @@ public:
|
||||
/// Viewport does not affects glClearBuffer so emulate viewport using scissor test
|
||||
void EmulateViewportWithScissor();
|
||||
|
||||
void MarkDirtyBlendState() {
|
||||
dirty.blend_state = true;
|
||||
}
|
||||
|
||||
void MarkDirtyStencilState() {
|
||||
dirty.stencil_state = true;
|
||||
}
|
||||
|
||||
void MarkDirtyPolygonOffset() {
|
||||
dirty.polygon_offset = true;
|
||||
}
|
||||
|
||||
void MarkDirtyColorMask() {
|
||||
dirty.color_mask = true;
|
||||
}
|
||||
|
||||
void AllDirty() {
|
||||
dirty.blend_state = true;
|
||||
dirty.stencil_state = true;
|
||||
dirty.polygon_offset = true;
|
||||
dirty.color_mask = true;
|
||||
}
|
||||
|
||||
private:
|
||||
static OpenGLState cur_state;
|
||||
|
||||
// Workaround for sRGB problems caused by QT not supporting srgb output
|
||||
static bool s_rgb_used;
|
||||
struct {
|
||||
bool blend_state;
|
||||
bool stencil_state;
|
||||
bool viewport_state;
|
||||
bool polygon_offset;
|
||||
bool color_mask;
|
||||
} dirty{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
|
||||
MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
|
||||
MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
|
||||
MP_RGB(128, 192, 128));
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -483,11 +485,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
|
||||
const auto& dst_params{dst_view->GetSurfaceParams()};
|
||||
|
||||
OpenGLState prev_state{OpenGLState::GetCurState()};
|
||||
SCOPE_EXIT({ prev_state.Apply(); });
|
||||
SCOPE_EXIT({
|
||||
prev_state.AllDirty();
|
||||
prev_state.Apply();
|
||||
});
|
||||
|
||||
OpenGLState state;
|
||||
state.draw.read_framebuffer = src_framebuffer.handle;
|
||||
state.draw.draw_framebuffer = dst_framebuffer.handle;
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
|
||||
u32 buffers{};
|
||||
@@ -535,6 +541,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
|
||||
}
|
||||
|
||||
void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
|
||||
const auto& src_params = src_surface->GetSurfaceParams();
|
||||
const auto& dst_params = dst_surface->GetSurfaceParams();
|
||||
UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
|
||||
|
||||
@@ -101,7 +101,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
|
||||
|
||||
RendererOpenGL::~RendererOpenGL() = default;
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
void RendererOpenGL::SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
||||
|
||||
@@ -109,6 +108,7 @@ void RendererOpenGL::SwapBuffers(
|
||||
|
||||
// Maintain the rasterizer's state as a priority
|
||||
OpenGLState prev_state = OpenGLState::GetCurState();
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
|
||||
if (framebuffer) {
|
||||
@@ -130,6 +130,8 @@ void RendererOpenGL::SwapBuffers(
|
||||
|
||||
DrawScreen(render_window.GetFramebufferLayout());
|
||||
|
||||
rasterizer->TickFrame();
|
||||
|
||||
render_window.SwapBuffers();
|
||||
}
|
||||
|
||||
@@ -139,6 +141,7 @@ void RendererOpenGL::SwapBuffers(
|
||||
system.GetPerfStats().BeginSystemFrame();
|
||||
|
||||
// Restore the rasterizer state
|
||||
prev_state.AllDirty();
|
||||
prev_state.Apply();
|
||||
}
|
||||
|
||||
@@ -205,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() {
|
||||
// Link shaders and get variable locations
|
||||
shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
|
||||
state.draw.shader_program = shader.handle;
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
|
||||
uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
|
||||
@@ -262,7 +266,6 @@ void RendererOpenGL::CreateRasterizer() {
|
||||
if (rasterizer) {
|
||||
return;
|
||||
}
|
||||
// Initialize sRGB Usage
|
||||
OpenGLState::ClearsRGBUsed();
|
||||
rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
|
||||
}
|
||||
@@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
|
||||
// Workaround brigthness problems in SMO by enabling sRGB in the final output
|
||||
// if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
|
||||
state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
// Restore default state
|
||||
state.framebuffer_srgb.enabled = false;
|
||||
state.texture_units[0].texture = 0;
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
// Clear sRGB state for the next frame
|
||||
OpenGLState::ClearsRGBUsed();
|
||||
@@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() {
|
||||
GLuint old_read_fb = state.draw.read_framebuffer;
|
||||
GLuint old_draw_fb = state.draw.draw_framebuffer;
|
||||
state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
|
||||
Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
|
||||
@@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() {
|
||||
screenshot_framebuffer.Release();
|
||||
state.draw.read_framebuffer = old_read_fb;
|
||||
state.draw.draw_framebuffer = old_draw_fb;
|
||||
state.AllDirty();
|
||||
state.Apply();
|
||||
glDeleteRenderbuffers(1, &renderbuffer);
|
||||
|
||||
|
||||
@@ -13,29 +13,67 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
|
||||
|
||||
VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
|
||||
|
||||
void VertexArrayPushBuffer::Setup(GLuint vao_) {
|
||||
vao = vao_;
|
||||
index_buffer = nullptr;
|
||||
vertex_buffers.clear();
|
||||
}
|
||||
|
||||
void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) {
|
||||
index_buffer = buffer;
|
||||
}
|
||||
|
||||
void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer,
|
||||
GLintptr offset, GLsizei stride) {
|
||||
vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride});
|
||||
}
|
||||
|
||||
void VertexArrayPushBuffer::Bind() {
|
||||
if (index_buffer) {
|
||||
glVertexArrayElementBuffer(vao, *index_buffer);
|
||||
}
|
||||
|
||||
// TODO(Rodrigo): Find a way to ARB_multi_bind this
|
||||
for (const auto& entry : vertex_buffers) {
|
||||
glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset,
|
||||
entry.stride);
|
||||
}
|
||||
}
|
||||
|
||||
BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
|
||||
|
||||
BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
|
||||
|
||||
void BindBuffersRangePushBuffer::Setup(GLuint first_) {
|
||||
first = first_;
|
||||
buffers.clear();
|
||||
buffer_pointers.clear();
|
||||
offsets.clear();
|
||||
sizes.clear();
|
||||
}
|
||||
|
||||
void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) {
|
||||
buffers.push_back(buffer);
|
||||
void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) {
|
||||
buffer_pointers.push_back(buffer);
|
||||
offsets.push_back(offset);
|
||||
sizes.push_back(size);
|
||||
}
|
||||
|
||||
void BindBuffersRangePushBuffer::Bind() const {
|
||||
const std::size_t count{buffers.size()};
|
||||
void BindBuffersRangePushBuffer::Bind() {
|
||||
// Ensure sizes are valid.
|
||||
const std::size_t count{buffer_pointers.size()};
|
||||
DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Dereference buffers.
|
||||
buffers.resize(count);
|
||||
std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(),
|
||||
[](const GLuint* pointer) { return *pointer; });
|
||||
|
||||
glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
|
||||
sizes.data());
|
||||
}
|
||||
|
||||
@@ -11,20 +11,49 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class BindBuffersRangePushBuffer {
|
||||
class VertexArrayPushBuffer final {
|
||||
public:
|
||||
BindBuffersRangePushBuffer(GLenum target);
|
||||
explicit VertexArrayPushBuffer();
|
||||
~VertexArrayPushBuffer();
|
||||
|
||||
void Setup(GLuint vao_);
|
||||
|
||||
void SetIndexBuffer(const GLuint* buffer);
|
||||
|
||||
void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset,
|
||||
GLsizei stride);
|
||||
|
||||
void Bind();
|
||||
|
||||
private:
|
||||
struct Entry {
|
||||
GLuint binding_index{};
|
||||
const GLuint* buffer{};
|
||||
GLintptr offset{};
|
||||
GLsizei stride{};
|
||||
};
|
||||
|
||||
GLuint vao{};
|
||||
const GLuint* index_buffer{};
|
||||
std::vector<Entry> vertex_buffers;
|
||||
};
|
||||
|
||||
class BindBuffersRangePushBuffer final {
|
||||
public:
|
||||
explicit BindBuffersRangePushBuffer(GLenum target);
|
||||
~BindBuffersRangePushBuffer();
|
||||
|
||||
void Setup(GLuint first_);
|
||||
|
||||
void Push(GLuint buffer, GLintptr offset, GLsizeiptr size);
|
||||
void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size);
|
||||
|
||||
void Bind() const;
|
||||
void Bind();
|
||||
|
||||
private:
|
||||
GLenum target;
|
||||
GLuint first;
|
||||
GLenum target{};
|
||||
GLuint first{};
|
||||
std::vector<const GLuint*> buffer_pointers;
|
||||
|
||||
std::vector<GLuint> buffers;
|
||||
std::vector<GLintptr> offsets;
|
||||
std::vector<GLsizeiptr> sizes;
|
||||
|
||||
@@ -205,10 +205,6 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
|
||||
using OperationDecompilersArray =
|
||||
std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
|
||||
|
||||
static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
|
||||
|
||||
void AllocateBindings() {
|
||||
@@ -430,20 +426,17 @@ private:
|
||||
instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input,
|
||||
t_in_uint, "instance_index");
|
||||
|
||||
bool is_point_size_declared = false;
|
||||
bool is_clip_distances_declared = false;
|
||||
for (const auto index : ir.GetOutputAttributes()) {
|
||||
if (index == Attribute::Index::PointSize) {
|
||||
is_point_size_declared = true;
|
||||
} else if (index == Attribute::Index::ClipDistances0123 ||
|
||||
index == Attribute::Index::ClipDistances4567) {
|
||||
if (index == Attribute::Index::ClipDistances0123 ||
|
||||
index == Attribute::Index::ClipDistances4567) {
|
||||
is_clip_distances_declared = true;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Id> members;
|
||||
members.push_back(t_float4);
|
||||
if (is_point_size_declared) {
|
||||
if (ir.UsesPointSize()) {
|
||||
members.push_back(t_float);
|
||||
}
|
||||
if (is_clip_distances_declared) {
|
||||
@@ -466,7 +459,7 @@ private:
|
||||
|
||||
position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true);
|
||||
point_size_index =
|
||||
MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared);
|
||||
MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize());
|
||||
clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances",
|
||||
is_clip_distances_declared);
|
||||
|
||||
@@ -712,7 +705,8 @@ private:
|
||||
case Attribute::Index::Position:
|
||||
return AccessElement(t_out_float, per_vertex, position_index,
|
||||
abuf->GetElement());
|
||||
case Attribute::Index::PointSize:
|
||||
case Attribute::Index::LayerViewportPointSize:
|
||||
UNIMPLEMENTED_IF(abuf->GetElement() != 3);
|
||||
return AccessElement(t_out_float, per_vertex, point_size_index);
|
||||
case Attribute::Index::ClipDistances0123:
|
||||
return AccessElement(t_out_float, per_vertex, clip_distances_index,
|
||||
@@ -806,12 +800,7 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
Id LogicalAll2(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Id LogicalAny2(Operation operation) {
|
||||
Id LogicalAnd2(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
@@ -1208,7 +1197,7 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
static constexpr OperationDecompilersArray operation_decompilers = {
|
||||
static constexpr std::array operation_decompilers = {
|
||||
&SPIRVDecompiler::Assign,
|
||||
|
||||
&SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
|
||||
@@ -1293,8 +1282,7 @@ private:
|
||||
&SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
|
||||
&SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
|
||||
&SPIRVDecompiler::LogicalPick2,
|
||||
&SPIRVDecompiler::LogicalAll2,
|
||||
&SPIRVDecompiler::LogicalAny2,
|
||||
&SPIRVDecompiler::LogicalAnd2,
|
||||
|
||||
&SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
|
||||
&SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
|
||||
@@ -1359,6 +1347,7 @@ private:
|
||||
&SPIRVDecompiler::WorkGroupId<1>,
|
||||
&SPIRVDecompiler::WorkGroupId<2>,
|
||||
};
|
||||
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
|
||||
|
||||
const VKDevice& device;
|
||||
const ShaderIR& ir;
|
||||
|
||||
@@ -46,12 +46,12 @@ void ShaderIR::Decode() {
|
||||
coverage_end = shader_info.end;
|
||||
if (shader_info.decompilable) {
|
||||
disable_flow_stack = true;
|
||||
const auto insert_block = ([this](NodeBlock& nodes, u32 label) {
|
||||
const auto insert_block = [this](NodeBlock& nodes, u32 label) {
|
||||
if (label == exit_branch) {
|
||||
return;
|
||||
}
|
||||
basic_blocks.insert({label, nodes});
|
||||
});
|
||||
};
|
||||
const auto& blocks = shader_info.blocks;
|
||||
NodeBlock current_block;
|
||||
u32 current_label = exit_branch;
|
||||
@@ -103,7 +103,7 @@ void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
|
||||
}
|
||||
|
||||
void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
|
||||
const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node {
|
||||
const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
|
||||
Node result = n;
|
||||
if (cond.cc != ConditionCode::T) {
|
||||
result = Conditional(GetConditionCode(cond.cc), {result});
|
||||
@@ -117,7 +117,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
|
||||
result = Conditional(GetPredicate(pred, is_neg), {result});
|
||||
}
|
||||
return result;
|
||||
});
|
||||
};
|
||||
if (block.branch.address < 0) {
|
||||
if (block.branch.kills) {
|
||||
Node n = Operation(OperationCode::Discard);
|
||||
|
||||
@@ -23,38 +23,51 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
|
||||
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
|
||||
|
||||
Node op_b = [&]() {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HSETP2_R:
|
||||
return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
|
||||
instr.hsetp2.negate_b);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b);
|
||||
|
||||
// We can't use the constant predicate as destination.
|
||||
ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
|
||||
|
||||
const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
|
||||
Tegra::Shader::PredCondition cond{};
|
||||
bool h_and{};
|
||||
Node op_b{};
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HSETP2_C:
|
||||
cond = instr.hsetp2.cbuf_and_imm.cond;
|
||||
h_and = instr.hsetp2.cbuf_and_imm.h_and;
|
||||
op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
|
||||
instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
|
||||
break;
|
||||
case OpCode::Id::HSETP2_IMM:
|
||||
cond = instr.hsetp2.cbuf_and_imm.cond;
|
||||
h_and = instr.hsetp2.cbuf_and_imm.h_and;
|
||||
op_b = UnpackHalfImmediate(instr, true);
|
||||
break;
|
||||
case OpCode::Id::HSETP2_R:
|
||||
cond = instr.hsetp2.reg.cond;
|
||||
h_and = instr.hsetp2.reg.h_and;
|
||||
op_b =
|
||||
UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b,
|
||||
instr.hsetp2.reg.negate_b),
|
||||
instr.hsetp2.reg.type_b);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
op_b = Immediate(0);
|
||||
}
|
||||
|
||||
const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
|
||||
const OperationCode pair_combiner =
|
||||
instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
|
||||
const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
|
||||
|
||||
const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
|
||||
const Node first_pred = Operation(pair_combiner, comparison);
|
||||
const auto Write = [&](u64 dest, Node src) {
|
||||
SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39));
|
||||
};
|
||||
|
||||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
const Node value = Operation(combiner, first_pred, second_pred);
|
||||
SetPredicate(bb, instr.hsetp2.pred3, value);
|
||||
|
||||
if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
|
||||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
|
||||
const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred);
|
||||
SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred));
|
||||
const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
|
||||
const u64 first = instr.hsetp2.pred0;
|
||||
const u64 second = instr.hsetp2.pred3;
|
||||
if (h_and) {
|
||||
const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
|
||||
Write(first, joined);
|
||||
Write(second, Operation(OperationCode::LogicalNegate, joined));
|
||||
} else {
|
||||
Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u)));
|
||||
Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u)));
|
||||
}
|
||||
|
||||
return pc;
|
||||
|
||||
@@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image
|
||||
const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
|
||||
Tegra::Shader::ImageType type) {
|
||||
const Node image_register{GetRegister(reg)};
|
||||
const Node base_image{
|
||||
const auto [base_image, cbuf_index, cbuf_offset]{
|
||||
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
|
||||
const auto cbuf{std::get_if<CbufNode>(&*base_image)};
|
||||
const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())};
|
||||
const auto cbuf_offset{cbuf_offset_imm->GetValue()};
|
||||
const auto cbuf_index{cbuf->GetIndex()};
|
||||
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
|
||||
|
||||
// If this image has already been used, return the existing mapping.
|
||||
|
||||
@@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
const Node op_b =
|
||||
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
|
||||
|
||||
SetTemporal(bb, 0, op_a);
|
||||
SetTemporal(bb, 1, op_b);
|
||||
SetRegister(bb, instr.gpr0, GetTemporal(0));
|
||||
SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
|
||||
SetTemporary(bb, 0, op_a);
|
||||
SetTemporary(bb, 1, op_b);
|
||||
SetRegister(bb, instr.gpr0, GetTemporary(0));
|
||||
SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
}();
|
||||
for (u32 i = 0; i < count; ++i)
|
||||
SetTemporal(bb, i, GetLmem(i * 4));
|
||||
SetTemporary(bb, i, GetLmem(i * 4));
|
||||
for (u32 i = 0; i < count; ++i)
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
|
||||
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
|
||||
SetTemporal(bb, i, gmem);
|
||||
SetTemporary(bb, i, gmem);
|
||||
}
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
TrackAndGetGlobalMemory(bb, instr, true);
|
||||
|
||||
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
|
||||
SetTemporal(bb, 0, real_address_base);
|
||||
SetTemporary(bb, 0, real_address_base);
|
||||
|
||||
const u32 count = GetUniformTypeElementsCount(type);
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
|
||||
SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
|
||||
}
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
const Node it_offset = Immediate(i * 4);
|
||||
@@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
|
||||
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
|
||||
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
|
||||
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB
|
||||
const auto addr_register{GetRegister(instr.gmem.gpr)};
|
||||
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
|
||||
|
||||
const Node base_address{
|
||||
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
|
||||
const auto cbuf = std::get_if<CbufNode>(&*base_address);
|
||||
ASSERT(cbuf != nullptr);
|
||||
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
|
||||
ASSERT(cbuf_offset_imm != nullptr);
|
||||
const auto cbuf_offset = cbuf_offset_imm->GetValue();
|
||||
const auto [base_address, index, offset] =
|
||||
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
|
||||
ASSERT(base_address != nullptr);
|
||||
|
||||
bb.push_back(
|
||||
Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
|
||||
bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
|
||||
|
||||
const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
|
||||
const GlobalMemoryBase descriptor{index, offset};
|
||||
const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
|
||||
auto& usage = entry->second;
|
||||
if (is_write) {
|
||||
|
||||
@@ -102,7 +102,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
PRECISE, op_a, Immediate(3));
|
||||
const Node operand =
|
||||
Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
||||
branch = Operation(OperationCode::BranchIndirect, convert);
|
||||
branch = Operation(OperationCode::BranchIndirect, operand);
|
||||
}
|
||||
|
||||
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
|
||||
|
||||
@@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta,
|
||||
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
|
||||
SetTemporal(bb, indexer++, value);
|
||||
SetTemporary(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -238,10 +238,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporal(bb, indexer++, value);
|
||||
SetTemporary(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -308,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
|
||||
const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
|
||||
bool is_array, bool is_shadow) {
|
||||
const Node sampler_register = GetRegister(reg);
|
||||
const Node base_sampler =
|
||||
const auto [base_sampler, cbuf_index, cbuf_offset] =
|
||||
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
|
||||
const auto cbuf = std::get_if<CbufNode>(&*base_sampler);
|
||||
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
|
||||
ASSERT(cbuf_offset_imm != nullptr);
|
||||
const auto cbuf_offset = cbuf_offset_imm->GetValue();
|
||||
const auto cbuf_index = cbuf->GetIndex();
|
||||
ASSERT(base_sampler != nullptr);
|
||||
const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
@@ -340,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
|
||||
// Skip disabled components
|
||||
continue;
|
||||
}
|
||||
SetTemporal(bb, dest_elem++, components[elem]);
|
||||
SetTemporary(bb, dest_elem++, components[elem]);
|
||||
}
|
||||
// After writing values in temporals, move them to the real registers
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -357,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
SetTemporal(bb, dest_elem++, components[component]);
|
||||
SetTemporary(bb, dest_elem++, components[component]);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
if (i < 2) {
|
||||
// Write the first two swizzle components to gpr0 and gpr0+1
|
||||
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
|
||||
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
|
||||
} else {
|
||||
ASSERT(instr.texs.HasTwoDestinations());
|
||||
// Write the rest of the swizzle components to gpr28 and gpr28+1
|
||||
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
|
||||
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -395,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||
return;
|
||||
}
|
||||
|
||||
SetTemporal(bb, 0, first_value);
|
||||
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
|
||||
SetTemporary(bb, 0, first_value);
|
||||
SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
|
||||
|
||||
SetRegister(bb, instr.gpr0, GetTemporal(0));
|
||||
SetRegister(bb, instr.gpr28, GetTemporal(1));
|
||||
SetRegister(bb, instr.gpr0, GetTemporary(0));
|
||||
SetRegister(bb, instr.gpr28, GetTemporary(1));
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
|
||||
@@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
|
||||
if (is_psl) {
|
||||
product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
|
||||
}
|
||||
SetTemporal(bb, 0, product);
|
||||
product = GetTemporal(0);
|
||||
SetTemporary(bb, 0, product);
|
||||
product = GetTemporary(0);
|
||||
|
||||
const Node original_c = op_c;
|
||||
const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
|
||||
@@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
}();
|
||||
|
||||
SetTemporal(bb, 1, op_c);
|
||||
op_c = GetTemporal(1);
|
||||
SetTemporary(bb, 1, op_c);
|
||||
op_c = GetTemporary(1);
|
||||
|
||||
// TODO(Rodrigo): Use an appropiate sign for this operation
|
||||
Node sum = Operation(OperationCode::IAdd, product, op_c);
|
||||
SetTemporal(bb, 2, sum);
|
||||
sum = GetTemporal(2);
|
||||
SetTemporary(bb, 2, sum);
|
||||
sum = GetTemporary(2);
|
||||
if (is_merge) {
|
||||
const Node a = BitfieldExtract(sum, 0, 16);
|
||||
const Node b =
|
||||
|
||||
@@ -101,8 +101,7 @@ enum class OperationCode {
|
||||
LogicalXor, /// (bool a, bool b) -> bool
|
||||
LogicalNegate, /// (bool a) -> bool
|
||||
LogicalPick2, /// (bool2 pair, uint index) -> bool
|
||||
LogicalAll2, /// (bool2 a) -> bool
|
||||
LogicalAny2, /// (bool2 a) -> bool
|
||||
LogicalAnd2, /// (bool2 a) -> bool
|
||||
|
||||
LogicalFLessThan, /// (float a, float b) -> bool
|
||||
LogicalFEqual, /// (float a, float b) -> bool
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
Node Conditional(Node condition, std::vector<Node> code) {
|
||||
return MakeNode<ConditionalNode>(condition, std::move(code));
|
||||
return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
|
||||
}
|
||||
|
||||
Node Comment(std::string text) {
|
||||
|
||||
@@ -61,8 +61,17 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
|
||||
const auto [entry, is_new] = used_cbufs.try_emplace(index);
|
||||
entry->second.MarkAsUsedIndirect();
|
||||
|
||||
const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset));
|
||||
return MakeNode<CbufNode>(index, final_offset);
|
||||
Node final_offset = [&] {
|
||||
// Attempt to inline constant buffer without a variable offset. This is done to allow
|
||||
// tracking LDC calls.
|
||||
if (const auto gpr = std::get_if<GprNode>(&*node)) {
|
||||
if (gpr->GetIndex() == Register::ZeroIndex) {
|
||||
return Immediate(offset);
|
||||
}
|
||||
}
|
||||
return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
|
||||
}();
|
||||
return MakeNode<CbufNode>(index, std::move(final_offset));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
|
||||
@@ -80,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) {
|
||||
|
||||
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
|
||||
used_input_attributes.emplace(index);
|
||||
return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer);
|
||||
return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
|
||||
@@ -89,6 +98,22 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres
|
||||
}
|
||||
|
||||
Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
|
||||
if (index == Attribute::Index::LayerViewportPointSize) {
|
||||
switch (element) {
|
||||
case 0:
|
||||
UNIMPLEMENTED();
|
||||
break;
|
||||
case 1:
|
||||
uses_layer = true;
|
||||
break;
|
||||
case 2:
|
||||
uses_viewport_index = true;
|
||||
break;
|
||||
case 3:
|
||||
uses_point_size = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (index == Attribute::Index::ClipDistances0123 ||
|
||||
index == Attribute::Index::ClipDistances4567) {
|
||||
const auto clip_index =
|
||||
@@ -97,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
|
||||
}
|
||||
used_output_attributes.insert(index);
|
||||
|
||||
return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer);
|
||||
return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
|
||||
@@ -109,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
|
||||
}
|
||||
|
||||
Node ShaderIR::GetLocalMemory(Node address) {
|
||||
return MakeNode<LmemNode>(address);
|
||||
return MakeNode<LmemNode>(std::move(address));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetTemporal(u32 id) {
|
||||
Node ShaderIR::GetTemporary(u32 id) {
|
||||
return GetRegister(Register::ZeroIndex + 1 + id);
|
||||
}
|
||||
|
||||
Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
|
||||
if (absolute) {
|
||||
value = Operation(OperationCode::FAbsolute, NO_PRECISE, value);
|
||||
value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
|
||||
}
|
||||
if (negate) {
|
||||
value = Operation(OperationCode::FNegate, NO_PRECISE, value);
|
||||
value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
|
||||
}
|
||||
return value;
|
||||
}
|
||||
@@ -130,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
|
||||
if (!saturate) {
|
||||
return value;
|
||||
}
|
||||
const Node positive_zero = Immediate(std::copysignf(0, 1));
|
||||
const Node positive_one = Immediate(1.0f);
|
||||
return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one);
|
||||
|
||||
Node positive_zero = Immediate(std::copysignf(0, 1));
|
||||
Node positive_one = Immediate(1.0f);
|
||||
return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
|
||||
std::move(positive_one));
|
||||
}
|
||||
|
||||
Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) {
|
||||
Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
|
||||
switch (size) {
|
||||
case Register::Size::Byte:
|
||||
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
|
||||
Immediate(24));
|
||||
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
|
||||
Immediate(24));
|
||||
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
|
||||
std::move(value), Immediate(24));
|
||||
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
|
||||
std::move(value), Immediate(24));
|
||||
return value;
|
||||
case Register::Size::Short:
|
||||
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
|
||||
Immediate(16));
|
||||
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
|
||||
Immediate(16));
|
||||
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
|
||||
std::move(value), Immediate(16));
|
||||
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
|
||||
std::move(value), Immediate(16));
|
||||
case Register::Size::Word:
|
||||
// Default - do nothing
|
||||
return value;
|
||||
@@ -163,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b
|
||||
return value;
|
||||
}
|
||||
if (absolute) {
|
||||
value = Operation(OperationCode::IAbsolute, NO_PRECISE, value);
|
||||
value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
|
||||
}
|
||||
if (negate) {
|
||||
value = Operation(OperationCode::INegate, NO_PRECISE, value);
|
||||
value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
|
||||
const Node value = Immediate(instr.half_imm.PackImmediates());
|
||||
Node value = Immediate(instr.half_imm.PackImmediates());
|
||||
if (!has_negation) {
|
||||
return value;
|
||||
}
|
||||
const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
|
||||
const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
|
||||
|
||||
return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate);
|
||||
Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
|
||||
Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
|
||||
|
||||
return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
|
||||
std::move(second_negate));
|
||||
}
|
||||
|
||||
Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
|
||||
return Operation(OperationCode::HUnpack, type, value);
|
||||
return Operation(OperationCode::HUnpack, type, std::move(value));
|
||||
}
|
||||
|
||||
Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
|
||||
@@ -191,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
|
||||
case Tegra::Shader::HalfMerge::H0_H1:
|
||||
return src;
|
||||
case Tegra::Shader::HalfMerge::F32:
|
||||
return Operation(OperationCode::HMergeF32, src);
|
||||
return Operation(OperationCode::HMergeF32, std::move(src));
|
||||
case Tegra::Shader::HalfMerge::Mrg_H0:
|
||||
return Operation(OperationCode::HMergeH0, dest, src);
|
||||
return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
|
||||
case Tegra::Shader::HalfMerge::Mrg_H1:
|
||||
return Operation(OperationCode::HMergeH1, dest, src);
|
||||
return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
|
||||
}
|
||||
UNREACHABLE();
|
||||
return src;
|
||||
@@ -203,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
|
||||
|
||||
Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
|
||||
if (absolute) {
|
||||
value = Operation(OperationCode::HAbsolute, NO_PRECISE, value);
|
||||
value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
|
||||
}
|
||||
if (negate) {
|
||||
value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true),
|
||||
value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
|
||||
GetPredicate(true));
|
||||
}
|
||||
return value;
|
||||
@@ -216,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
|
||||
if (!saturate) {
|
||||
return value;
|
||||
}
|
||||
const Node positive_zero = Immediate(std::copysignf(0, 1));
|
||||
const Node positive_one = Immediate(1.0f);
|
||||
return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one);
|
||||
|
||||
Node positive_zero = Immediate(std::copysignf(0, 1));
|
||||
Node positive_one = Immediate(1.0f);
|
||||
return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
|
||||
std::move(positive_one));
|
||||
}
|
||||
|
||||
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
|
||||
@@ -246,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
|
||||
condition == PredCondition::LessEqualWithNan ||
|
||||
condition == PredCondition::GreaterThanWithNan ||
|
||||
condition == PredCondition::GreaterEqualWithNan) {
|
||||
|
||||
predicate = Operation(OperationCode::LogicalOr, predicate,
|
||||
Operation(OperationCode::LogicalFIsNan, op_a));
|
||||
predicate = Operation(OperationCode::LogicalOr, predicate,
|
||||
@@ -275,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
|
||||
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
|
||||
"Unknown predicate comparison operation");
|
||||
|
||||
Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b);
|
||||
Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
|
||||
std::move(op_b));
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
|
||||
condition == PredCondition::NotEqualWithNan ||
|
||||
@@ -305,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition
|
||||
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
|
||||
"Unknown predicate comparison operation");
|
||||
|
||||
const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
|
||||
|
||||
return predicate;
|
||||
return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
|
||||
}
|
||||
|
||||
OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
|
||||
@@ -333,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
|
||||
}
|
||||
|
||||
void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
|
||||
bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src));
|
||||
bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
|
||||
}
|
||||
|
||||
void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
|
||||
bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src));
|
||||
bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
|
||||
}
|
||||
|
||||
void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
|
||||
bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value));
|
||||
bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
|
||||
}
|
||||
|
||||
void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
|
||||
bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value));
|
||||
bb.push_back(
|
||||
Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
|
||||
}
|
||||
|
||||
void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) {
|
||||
SetRegister(bb, Register::ZeroIndex + 1 + id, value);
|
||||
void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
|
||||
SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
|
||||
}
|
||||
|
||||
void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
|
||||
if (!sets_cc) {
|
||||
return;
|
||||
}
|
||||
const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f));
|
||||
SetInternalFlag(bb, InternalFlag::Zero, zerop);
|
||||
Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f));
|
||||
SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
|
||||
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
|
||||
}
|
||||
|
||||
@@ -365,14 +395,14 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_
|
||||
if (!sets_cc) {
|
||||
return;
|
||||
}
|
||||
const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0));
|
||||
SetInternalFlag(bb, InternalFlag::Zero, zerop);
|
||||
Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
|
||||
SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
|
||||
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
|
||||
}
|
||||
|
||||
Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
|
||||
return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset),
|
||||
Immediate(bits));
|
||||
return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
|
||||
Immediate(offset), Immediate(bits));
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
@@ -5,13 +5,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
@@ -115,6 +112,18 @@ public:
|
||||
return static_cast<std::size_t>(coverage_end * sizeof(u64));
|
||||
}
|
||||
|
||||
bool UsesLayer() const {
|
||||
return uses_layer;
|
||||
}
|
||||
|
||||
bool UsesViewportIndex() const {
|
||||
return uses_viewport_index;
|
||||
}
|
||||
|
||||
bool UsesPointSize() const {
|
||||
return uses_point_size;
|
||||
}
|
||||
|
||||
bool HasPhysicalAttributes() const {
|
||||
return uses_physical_attributes;
|
||||
}
|
||||
@@ -198,8 +207,8 @@ private:
|
||||
Node GetInternalFlag(InternalFlag flag, bool negated = false);
|
||||
/// Generates a node representing a local memory address
|
||||
Node GetLocalMemory(Node address);
|
||||
/// Generates a temporal, internally it uses a post-RZ register
|
||||
Node GetTemporal(u32 id);
|
||||
/// Generates a temporary, internally it uses a post-RZ register
|
||||
Node GetTemporary(u32 id);
|
||||
|
||||
/// Sets a register. src value must be a number-evaluated node.
|
||||
void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
|
||||
@@ -209,8 +218,8 @@ private:
|
||||
void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
|
||||
/// Sets a local memory address. address and value must be a number-evaluated node
|
||||
void SetLocalMemory(NodeBlock& bb, Node address, Node value);
|
||||
/// Sets a temporal. Internally it uses a post-RZ register
|
||||
void SetTemporal(NodeBlock& bb, u32 id, Node value);
|
||||
/// Sets a temporary. Internally it uses a post-RZ register
|
||||
void SetTemporary(NodeBlock& bb, u32 id, Node value);
|
||||
|
||||
/// Sets internal flags from a float
|
||||
void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
|
||||
@@ -316,7 +325,7 @@ private:
|
||||
void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
|
||||
Node op_c, Node imm_lut, bool sets_cc);
|
||||
|
||||
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||
std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||
|
||||
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||
|
||||
@@ -346,6 +355,9 @@ private:
|
||||
std::set<Image> used_images;
|
||||
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
|
||||
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
|
||||
bool uses_layer{};
|
||||
bool uses_viewport_index{};
|
||||
bool uses_point_size{};
|
||||
bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
|
||||
|
||||
Tegra::Shader::Header header;
|
||||
|
||||
@@ -15,56 +15,63 @@ namespace {
|
||||
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
|
||||
OperationCode operation_code) {
|
||||
for (; cursor >= 0; --cursor) {
|
||||
const Node node = code.at(cursor);
|
||||
Node node = code.at(cursor);
|
||||
|
||||
if (const auto operation = std::get_if<OperationNode>(&*node)) {
|
||||
if (operation->GetCode() == operation_code) {
|
||||
return {node, cursor};
|
||||
return {std::move(node), cursor};
|
||||
}
|
||||
}
|
||||
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
||||
const auto& conditional_code = conditional->GetCode();
|
||||
const auto [found, internal_cursor] = FindOperation(
|
||||
auto [found, internal_cursor] = FindOperation(
|
||||
conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
|
||||
if (found) {
|
||||
return {found, cursor};
|
||||
return {std::move(found), cursor};
|
||||
}
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
} // namespace
|
||||
} // Anonymous namespace
|
||||
|
||||
Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const {
|
||||
std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
|
||||
s64 cursor) const {
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
|
||||
// Cbuf found, but it has to be immediate
|
||||
return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
|
||||
// Constant buffer found, test if it's an immediate
|
||||
const auto offset = cbuf->GetOffset();
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||
return {tracked, cbuf->GetIndex(), immediate->GetValue()};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
|
||||
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
|
||||
return nullptr;
|
||||
return {};
|
||||
}
|
||||
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same
|
||||
// register that it uses as operand
|
||||
const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
|
||||
if (!source) {
|
||||
return nullptr;
|
||||
return {};
|
||||
}
|
||||
return TrackCbuf(source, code, new_cursor);
|
||||
}
|
||||
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
|
||||
for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
|
||||
if (const auto found = TrackCbuf((*operation)[i], code, cursor)) {
|
||||
// Cbuf found in operand
|
||||
if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) {
|
||||
// Cbuf found in operand.
|
||||
return found;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
return {};
|
||||
}
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
|
||||
const auto& conditional_code = conditional->GetCode();
|
||||
return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
|
||||
}
|
||||
return nullptr;
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
|
||||
|
||||
@@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs)
|
||||
|
||||
// Linear Surface check
|
||||
if (!params.is_tiled) {
|
||||
if (std::tie(params.width, params.height, params.pitch) ==
|
||||
std::tie(rhs.width, rhs.height, rhs.pitch)) {
|
||||
return MatchStructureResult::FullMatch;
|
||||
if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
|
||||
if (params.width == rhs.width) {
|
||||
return MatchStructureResult::FullMatch;
|
||||
} else {
|
||||
return MatchStructureResult::SemiMatch;
|
||||
}
|
||||
}
|
||||
return MatchStructureResult::None;
|
||||
}
|
||||
|
||||
@@ -200,8 +200,9 @@ public:
|
||||
modification_tick = tick;
|
||||
}
|
||||
|
||||
void MarkAsRenderTarget(const bool is_target) {
|
||||
void MarkAsRenderTarget(const bool is_target, const u32 index) {
|
||||
this->is_target = is_target;
|
||||
this->index = index;
|
||||
}
|
||||
|
||||
void MarkAsPicked(const bool is_picked) {
|
||||
@@ -221,6 +222,10 @@ public:
|
||||
return is_target;
|
||||
}
|
||||
|
||||
u32 GetRenderTarget() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
bool IsRegistered() const {
|
||||
return is_registered;
|
||||
}
|
||||
@@ -307,10 +312,13 @@ private:
|
||||
return view;
|
||||
}
|
||||
|
||||
static constexpr u32 NO_RT = 0xFFFFFFFF;
|
||||
|
||||
bool is_modified{};
|
||||
bool is_target{};
|
||||
bool is_registered{};
|
||||
bool is_picked{};
|
||||
u32 index{NO_RT};
|
||||
u64 modification_tick{};
|
||||
};
|
||||
|
||||
|
||||
@@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co
|
||||
|
||||
std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
|
||||
bool uncompressed) const {
|
||||
const bool tiled{as_host_size ? false : is_tiled};
|
||||
const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
|
||||
const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
|
||||
const u32 depth{is_layered ? 1U : GetMipDepth(level)};
|
||||
return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth,
|
||||
GetMipBlockHeight(level), GetMipBlockDepth(level));
|
||||
if (is_tiled) {
|
||||
return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
|
||||
depth, GetMipBlockHeight(level),
|
||||
GetMipBlockDepth(level));
|
||||
} else if (as_host_size || IsBuffer()) {
|
||||
return GetBytesPerPixel() * width * height * depth;
|
||||
} else {
|
||||
// Linear Texture Case
|
||||
return pitch * height * depth;
|
||||
}
|
||||
}
|
||||
|
||||
bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
|
||||
|
||||
@@ -116,10 +116,10 @@ public:
|
||||
std::lock_guard lock{mutex};
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
|
||||
if (!maxwell3d.dirty_flags.zeta_buffer) {
|
||||
if (!maxwell3d.dirty.depth_buffer) {
|
||||
return depth_buffer.view;
|
||||
}
|
||||
maxwell3d.dirty_flags.zeta_buffer = false;
|
||||
maxwell3d.dirty.depth_buffer = false;
|
||||
|
||||
const auto& regs{maxwell3d.regs};
|
||||
const auto gpu_addr{regs.zeta.Address()};
|
||||
@@ -133,11 +133,11 @@ public:
|
||||
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
|
||||
auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
|
||||
if (depth_buffer.target)
|
||||
depth_buffer.target->MarkAsRenderTarget(false);
|
||||
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
|
||||
depth_buffer.target = surface_view.first;
|
||||
depth_buffer.view = surface_view.second;
|
||||
if (depth_buffer.target)
|
||||
depth_buffer.target->MarkAsRenderTarget(true);
|
||||
depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
|
||||
return surface_view.second;
|
||||
}
|
||||
|
||||
@@ -145,10 +145,10 @@ public:
|
||||
std::lock_guard lock{mutex};
|
||||
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
if (!maxwell3d.dirty_flags.color_buffer[index]) {
|
||||
if (!maxwell3d.dirty.render_target[index]) {
|
||||
return render_targets[index].view;
|
||||
}
|
||||
maxwell3d.dirty_flags.color_buffer.reset(index);
|
||||
maxwell3d.dirty.render_target[index] = false;
|
||||
|
||||
const auto& regs{maxwell3d.regs};
|
||||
if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
|
||||
@@ -167,11 +167,11 @@ public:
|
||||
auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
|
||||
preserve_contents, true);
|
||||
if (render_targets[index].target)
|
||||
render_targets[index].target->MarkAsRenderTarget(false);
|
||||
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
|
||||
render_targets[index].target = surface_view.first;
|
||||
render_targets[index].view = surface_view.second;
|
||||
if (render_targets[index].target)
|
||||
render_targets[index].target->MarkAsRenderTarget(true);
|
||||
render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
|
||||
return surface_view.second;
|
||||
}
|
||||
|
||||
@@ -191,7 +191,7 @@ public:
|
||||
if (depth_buffer.target == nullptr) {
|
||||
return;
|
||||
}
|
||||
depth_buffer.target->MarkAsRenderTarget(false);
|
||||
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
|
||||
depth_buffer.target = nullptr;
|
||||
depth_buffer.view = nullptr;
|
||||
}
|
||||
@@ -200,7 +200,7 @@ public:
|
||||
if (render_targets[index].target == nullptr) {
|
||||
return;
|
||||
}
|
||||
render_targets[index].target->MarkAsRenderTarget(false);
|
||||
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
|
||||
render_targets[index].target = nullptr;
|
||||
render_targets[index].view = nullptr;
|
||||
}
|
||||
@@ -270,6 +270,17 @@ protected:
|
||||
// and reading it from a sepparate buffer.
|
||||
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
|
||||
|
||||
void ManageRenderTargetUnregister(TSurface& surface) {
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
const u32 index = surface->GetRenderTarget();
|
||||
if (index == DEPTH_RT) {
|
||||
maxwell3d.dirty.depth_buffer = true;
|
||||
} else {
|
||||
maxwell3d.dirty.render_target[index] = true;
|
||||
}
|
||||
maxwell3d.dirty.render_settings = true;
|
||||
}
|
||||
|
||||
void Register(TSurface surface) {
|
||||
const GPUVAddr gpu_addr = surface->GetGpuAddr();
|
||||
const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
|
||||
@@ -294,6 +305,9 @@ protected:
|
||||
if (guard_render_targets && surface->IsProtected()) {
|
||||
return;
|
||||
}
|
||||
if (!guard_render_targets && surface->IsRenderTarget()) {
|
||||
ManageRenderTargetUnregister(surface);
|
||||
}
|
||||
const GPUVAddr gpu_addr = surface->GetGpuAddr();
|
||||
const CacheAddr cache_ptr = surface->GetCacheAddr();
|
||||
const std::size_t size = surface->GetSizeInBytes();
|
||||
@@ -649,15 +663,6 @@ private:
|
||||
}
|
||||
return {current_surface, *view};
|
||||
}
|
||||
// The next case is unsafe, so if we r in accurate GPU, just skip it
|
||||
if (Settings::values.use_accurate_gpu_emulation) {
|
||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||
MatchTopologyResult::FullMatch);
|
||||
}
|
||||
// This is the case the texture is a part of the parent.
|
||||
if (current_surface->MatchesSubTexture(params, gpu_addr)) {
|
||||
return RebuildSurface(current_surface, params, is_render);
|
||||
}
|
||||
} else {
|
||||
// If there are many overlaps, odds are they are subtextures of the candidate
|
||||
// surface. We try to construct a new surface based on the candidate parameters,
|
||||
@@ -793,6 +798,9 @@ private:
|
||||
static constexpr u64 registry_page_size{1 << registry_page_bits};
|
||||
std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
|
||||
|
||||
static constexpr u32 DEPTH_RT = 8;
|
||||
static constexpr u32 NO_RT = 0xFFFFFFFF;
|
||||
|
||||
// The L1 Cache is used for fast texture lookup before checking the overlaps
|
||||
// This avoids calculating size and other stuffs.
|
||||
std::unordered_map<CacheAddr, TSurface> l1_cache;
|
||||
|
||||
Reference in New Issue
Block a user