Compare commits

..

1 Commits

Author SHA1 Message Date
Lody
bd11b10298 shader: rewrite LOP3.LUT
shader: opt
2022-03-07 12:15:34 +08:00
5 changed files with 158 additions and 195 deletions

View File

@@ -285,141 +285,72 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory
return ResultSuccess;
}
ResultCode KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
// Validate the mapping request.
R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
ResultInvalidMemoryRegion);
// Lock the table.
ResultCode KPageTable::MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
KScopedLightLock lk(general_lock);
// Verify that the source memory is normal heap.
KMemoryState src_state{};
KMemoryPermission src_perm{};
std::size_t num_src_allocator_blocks{};
R_TRY(this->CheckMemoryState(&src_state, &src_perm, nullptr, &num_src_allocator_blocks,
src_address, size, KMemoryState::All, KMemoryState::Normal,
KMemoryPermission::All, KMemoryPermission::UserReadWrite,
KMemoryAttribute::All, KMemoryAttribute::None));
const std::size_t num_pages{size / PageSize};
// Verify that the destination memory is unmapped.
std::size_t num_dst_allocator_blocks{};
R_TRY(this->CheckMemoryState(&num_dst_allocator_blocks, dst_address, size, KMemoryState::All,
KMemoryState::Free, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::None,
KMemoryAttribute::None));
KMemoryState state{};
KMemoryPermission perm{};
CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, nullptr, src_addr, size,
KMemoryState::All, KMemoryState::Normal, KMemoryPermission::All,
KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask,
KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
// Map the code memory.
{
// Determine the number of pages being operated on.
const std::size_t num_pages = size / PageSize;
// Create page groups for the memory being mapped.
KPageLinkedList pg;
AddRegionToPages(src_address, num_pages, pg);
// Reprotect the source as kernel-read/not mapped.
const auto new_perm = static_cast<KMemoryPermission>(KMemoryPermission::KernelRead |
KMemoryPermission::NotMapped);
R_TRY(Operate(src_address, num_pages, new_perm, OperationType::ChangePermissions));
// Ensure that we unprotect the source pages on failure.
auto unprot_guard = SCOPE_GUARD({
ASSERT(this->Operate(src_address, num_pages, src_perm, OperationType::ChangePermissions)
.IsSuccess());
});
// Map the alias pages.
R_TRY(MapPages(dst_address, pg, new_perm));
// We successfully mapped the alias pages, so we don't need to unprotect the src pages on
// failure.
unprot_guard.Cancel();
// Apply the memory block updates.
block_manager->Update(src_address, num_pages, src_state, new_perm,
KMemoryAttribute::Locked);
block_manager->Update(dst_address, num_pages, KMemoryState::AliasCode, new_perm,
KMemoryAttribute::None);
if (IsRegionMapped(dst_addr, size)) {
return ResultInvalidCurrentMemory;
}
KPageLinkedList page_linked_list;
AddRegionToPages(src_addr, num_pages, page_linked_list);
{
auto block_guard = detail::ScopeExit(
[&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); });
CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None,
OperationType::ChangePermissions));
CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::None));
block_guard.Cancel();
}
block_manager->Update(src_addr, num_pages, state, KMemoryPermission::None,
KMemoryAttribute::Locked);
block_manager->Update(dst_addr, num_pages, KMemoryState::AliasCode);
return ResultSuccess;
}
ResultCode KPageTable::UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
// Validate the mapping request.
R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
ResultInvalidMemoryRegion);
// Lock the table.
ResultCode KPageTable::UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
KScopedLightLock lk(general_lock);
// Verify that the source memory is locked normal heap.
std::size_t num_src_allocator_blocks{};
R_TRY(this->CheckMemoryState(std::addressof(num_src_allocator_blocks), src_address, size,
KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::All,
KMemoryAttribute::Locked));
if (!size) {
return ResultSuccess;
}
// Verify that the destination memory is aliasable code.
std::size_t num_dst_allocator_blocks{};
R_TRY(this->CheckMemoryStateContiguous(
std::addressof(num_dst_allocator_blocks), dst_address, size, KMemoryState::FlagCanCodeAlias,
const std::size_t num_pages{size / PageSize};
CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, nullptr, src_addr, size,
KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::Mask,
KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
KMemoryState state{};
CASCADE_CODE(CheckMemoryState(
&state, nullptr, nullptr, nullptr, dst_addr, PageSize, KMemoryState::FlagCanCodeAlias,
KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
KMemoryAttribute::All, KMemoryAttribute::None));
KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
CASCADE_CODE(CheckMemoryState(dst_addr, size, KMemoryState::All, state, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::Mask,
KMemoryAttribute::None));
CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap));
// Determine whether any pages being unmapped are code.
bool any_code_pages = false;
{
KMemoryBlockManager::const_iterator it = block_manager->FindIterator(dst_address);
while (true) {
// Get the memory info.
const KMemoryInfo info = it->GetMemoryInfo();
block_manager->Update(dst_addr, num_pages, KMemoryState::Free);
block_manager->Update(src_addr, num_pages, KMemoryState::Normal,
KMemoryPermission::UserReadWrite);
// Check if the memory has code flag.
if ((info.GetState() & KMemoryState::FlagCode) != KMemoryState::None) {
any_code_pages = true;
break;
}
// Check if we're done.
if (dst_address + size - 1 <= info.GetLastAddress()) {
break;
}
// Advance.
++it;
}
}
// Ensure that we maintain the instruction cache.
bool reprotected_pages = false;
SCOPE_EXIT({
if (reprotected_pages && any_code_pages) {
system.InvalidateCpuInstructionCacheRange(dst_address, size);
}
});
// Unmap.
{
// Determine the number of pages being operated on.
const std::size_t num_pages = size / PageSize;
// Unmap the aliased copy of the pages.
R_TRY(Operate(dst_address, num_pages, KMemoryPermission::None, OperationType::Unmap));
// Try to set the permissions for the source pages back to what they should be.
R_TRY(Operate(src_address, num_pages, KMemoryPermission::UserReadWrite,
OperationType::ChangePermissions));
// Apply the memory block updates.
block_manager->Update(dst_address, num_pages, KMemoryState::None);
block_manager->Update(src_address, num_pages, KMemoryState::Normal,
KMemoryPermission::UserReadWrite);
// Note that we reprotected pages.
reprotected_pages = true;
}
system.InvalidateCpuInstructionCacheRange(dst_addr, size);
return ResultSuccess;
}

View File

@@ -36,8 +36,8 @@ public:
KMemoryManager::Pool pool);
ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state,
KMemoryPermission perm);
ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
ResultCode MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
ResultCode UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
ResultCode UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table,
VAddr src_addr);
ResultCode MapPhysicalMemory(VAddr addr, std::size_t size);

View File

@@ -2,6 +2,30 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// This files contains code from Ryujinx
// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
// The sections using code from Ryujinx are marked with a link to the original version
// MIT License
//
// Copyright (c) Ryujinx Team and Contributors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
// associated documentation files (the "Software"), to deal in the Software without restriction,
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
@@ -13,59 +37,87 @@ namespace {
// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
u64 ttbl) {
IR::U32 r{ir.Imm32(0)};
const IR::U32 not_a{ir.BitwiseNot(a)};
const IR::U32 not_b{ir.BitwiseNot(b)};
const IR::U32 not_c{ir.BitwiseNot(c)};
if (ttbl & 0x01) {
// r |= ~a & ~b & ~c;
const auto lhs{ir.BitwiseAnd(not_a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
std::optional<IR::U32> value;
// Encode into gray code.
u32 map = ttbl & 1;
map |= ((ttbl >> 1) & 1) << 4;
map |= ((ttbl >> 2) & 1) << 1;
map |= ((ttbl >> 3) & 1) << 5;
map |= ((ttbl >> 4) & 1) << 3;
map |= ((ttbl >> 5) & 1) << 7;
map |= ((ttbl >> 6) & 1) << 2;
map |= ((ttbl >> 7) & 1) << 6;
u32 visited = 0;
for (u32 index = 0; index < 8 && visited != 0xff; index++) {
if ((map & (1 << index)) == 0) {
continue;
}
const auto RotateLeft4 = [](u32 value, u32 shift) {
return ((value << shift) | (value >> (4 - shift))) & 0xf;
};
u32 mask = 0;
for (u32 size = 4; size != 0; size >>= 1) {
mask = RotateLeft4((1 << size) - 1, index & 3) << (index & 4);
if ((map & mask) == mask) {
break;
}
}
// The mask should wrap, if we are on the high row, shift to low etc.
const u32 mask2 = (index & 4) != 0 ? mask >> 4 : mask << 4;
if ((map & mask2) == mask2) {
mask |= mask2;
}
if ((mask & visited) == mask) {
continue;
}
const bool not_a = (mask & 0x33) != 0;
const bool not_b = (mask & 0x99) != 0;
const bool not_c = (mask & 0x0f) != 0;
const bool a_changes = (mask & 0xcc) != 0 && not_a;
const bool b_changes = (mask & 0x66) != 0 && not_b;
const bool c_changes = (mask & 0xf0) != 0 && not_c;
std::optional<IR::U32> local_value;
const auto And = [&](const IR::U32& source, bool inverted) {
IR::U32 result = inverted ? ir.BitwiseNot(source) : source;
if (local_value) {
local_value = ir.BitwiseAnd(*local_value, result);
} else {
local_value = result;
}
};
if (!a_changes) {
And(a, not_a);
}
if (!b_changes) {
And(b, not_b);
}
if (!c_changes) {
And(c, not_c);
}
if (value) {
value = ir.BitwiseOr(*value, *local_value);
} else {
value = local_value;
}
visited |= mask;
}
if (ttbl & 0x02) {
// r |= ~a & ~b & c;
const auto lhs{ir.BitwiseAnd(not_a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x04) {
// r |= ~a & b & ~c;
const auto lhs{ir.BitwiseAnd(not_a, b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x08) {
// r |= ~a & b & c;
const auto lhs{ir.BitwiseAnd(not_a, b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x10) {
// r |= a & ~b & ~c;
const auto lhs{ir.BitwiseAnd(a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x20) {
// r |= a & ~b & c;
const auto lhs{ir.BitwiseAnd(a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x40) {
// r |= a & b & ~c;
const auto lhs{ir.BitwiseAnd(a, b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x80) {
// r |= a & b & c;
const auto lhs{ir.BitwiseAnd(a, b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
return r;
return *value;
}
IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {

View File

@@ -53,6 +53,7 @@ void MaxwellDMA::Launch() {
// TODO(Subv): Perform more research and implement all features of this engine.
const LaunchDMA& launch = regs.launch_dma;
ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
ASSERT(regs.dst_params.origin.x == 0);
@@ -78,7 +79,6 @@ void MaxwellDMA::Launch() {
CopyPitchToBlockLinear();
}
}
ReleaseSemaphore();
}
void MaxwellDMA::CopyPitchToPitch() {
@@ -244,22 +244,4 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::ReleaseSemaphore() {
const auto type = regs.launch_dma.semaphore_type;
const GPUVAddr address = regs.semaphore.address;
switch (type) {
case LaunchDMA::SemaphoreType::NONE:
break;
case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE:
memory_manager.Write<u32>(address, regs.semaphore.payload);
break;
case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE:
memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload));
memory_manager.Write<u64>(address + 8, system.GPU().GetTicks());
break;
default:
UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value()));
}
}
} // namespace Tegra::Engines

View File

@@ -224,8 +224,6 @@ private:
void FastCopyBlockLinearToPitch();
void ReleaseSemaphore();
Core::System& system;
MemoryManager& memory_manager;