Compare commits

..

1 Commits

Author SHA1 Message Date
Lody
bd11b10298 shader: rewrite LOP3.LUT
shader: opt
2022-03-07 12:15:34 +08:00
10 changed files with 184 additions and 839 deletions

View File

@@ -326,9 +326,7 @@ struct System::Impl {
is_powered_on = false;
exit_lock = false;
if (gpu_core != nullptr) {
gpu_core->NotifyShutdown();
}
gpu_core->NotifyShutdown();
services.reset();
service_manager.reset();

View File

@@ -42,20 +42,11 @@ public:
context.MakeCurrent();
}
~Scoped() {
if (active) {
context.DoneCurrent();
}
}
/// In the event that context was destroyed before the Scoped is destroyed, this provides a
/// mechanism to prevent calling a destroyed object's method during the deconstructor
void Cancel() {
active = false;
context.DoneCurrent();
}
private:
GraphicsContext& context;
bool active{true};
};
/// Calls MakeCurrent on the context and calls DoneCurrent when the scope for the returned value

View File

@@ -285,141 +285,72 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory
return ResultSuccess;
}
ResultCode KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
// Validate the mapping request.
R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
ResultInvalidMemoryRegion);
// Lock the table.
ResultCode KPageTable::MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
KScopedLightLock lk(general_lock);
// Verify that the source memory is normal heap.
KMemoryState src_state{};
KMemoryPermission src_perm{};
std::size_t num_src_allocator_blocks{};
R_TRY(this->CheckMemoryState(&src_state, &src_perm, nullptr, &num_src_allocator_blocks,
src_address, size, KMemoryState::All, KMemoryState::Normal,
KMemoryPermission::All, KMemoryPermission::UserReadWrite,
KMemoryAttribute::All, KMemoryAttribute::None));
const std::size_t num_pages{size / PageSize};
// Verify that the destination memory is unmapped.
std::size_t num_dst_allocator_blocks{};
R_TRY(this->CheckMemoryState(&num_dst_allocator_blocks, dst_address, size, KMemoryState::All,
KMemoryState::Free, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::None,
KMemoryAttribute::None));
KMemoryState state{};
KMemoryPermission perm{};
CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, nullptr, src_addr, size,
KMemoryState::All, KMemoryState::Normal, KMemoryPermission::All,
KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask,
KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
// Map the code memory.
{
// Determine the number of pages being operated on.
const std::size_t num_pages = size / PageSize;
// Create page groups for the memory being mapped.
KPageLinkedList pg;
AddRegionToPages(src_address, num_pages, pg);
// Reprotect the source as kernel-read/not mapped.
const auto new_perm = static_cast<KMemoryPermission>(KMemoryPermission::KernelRead |
KMemoryPermission::NotMapped);
R_TRY(Operate(src_address, num_pages, new_perm, OperationType::ChangePermissions));
// Ensure that we unprotect the source pages on failure.
auto unprot_guard = SCOPE_GUARD({
ASSERT(this->Operate(src_address, num_pages, src_perm, OperationType::ChangePermissions)
.IsSuccess());
});
// Map the alias pages.
R_TRY(MapPages(dst_address, pg, new_perm));
// We successfully mapped the alias pages, so we don't need to unprotect the src pages on
// failure.
unprot_guard.Cancel();
// Apply the memory block updates.
block_manager->Update(src_address, num_pages, src_state, new_perm,
KMemoryAttribute::Locked);
block_manager->Update(dst_address, num_pages, KMemoryState::AliasCode, new_perm,
KMemoryAttribute::None);
if (IsRegionMapped(dst_addr, size)) {
return ResultInvalidCurrentMemory;
}
KPageLinkedList page_linked_list;
AddRegionToPages(src_addr, num_pages, page_linked_list);
{
auto block_guard = detail::ScopeExit(
[&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); });
CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None,
OperationType::ChangePermissions));
CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::None));
block_guard.Cancel();
}
block_manager->Update(src_addr, num_pages, state, KMemoryPermission::None,
KMemoryAttribute::Locked);
block_manager->Update(dst_addr, num_pages, KMemoryState::AliasCode);
return ResultSuccess;
}
ResultCode KPageTable::UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
// Validate the mapping request.
R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
ResultInvalidMemoryRegion);
// Lock the table.
ResultCode KPageTable::UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
KScopedLightLock lk(general_lock);
// Verify that the source memory is locked normal heap.
std::size_t num_src_allocator_blocks{};
R_TRY(this->CheckMemoryState(std::addressof(num_src_allocator_blocks), src_address, size,
KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::All,
KMemoryAttribute::Locked));
if (!size) {
return ResultSuccess;
}
// Verify that the destination memory is aliasable code.
std::size_t num_dst_allocator_blocks{};
R_TRY(this->CheckMemoryStateContiguous(
std::addressof(num_dst_allocator_blocks), dst_address, size, KMemoryState::FlagCanCodeAlias,
const std::size_t num_pages{size / PageSize};
CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, nullptr, src_addr, size,
KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::Mask,
KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
KMemoryState state{};
CASCADE_CODE(CheckMemoryState(
&state, nullptr, nullptr, nullptr, dst_addr, PageSize, KMemoryState::FlagCanCodeAlias,
KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
KMemoryAttribute::All, KMemoryAttribute::None));
KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
CASCADE_CODE(CheckMemoryState(dst_addr, size, KMemoryState::All, state, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::Mask,
KMemoryAttribute::None));
CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap));
// Determine whether any pages being unmapped are code.
bool any_code_pages = false;
{
KMemoryBlockManager::const_iterator it = block_manager->FindIterator(dst_address);
while (true) {
// Get the memory info.
const KMemoryInfo info = it->GetMemoryInfo();
block_manager->Update(dst_addr, num_pages, KMemoryState::Free);
block_manager->Update(src_addr, num_pages, KMemoryState::Normal,
KMemoryPermission::UserReadWrite);
// Check if the memory has code flag.
if ((info.GetState() & KMemoryState::FlagCode) != KMemoryState::None) {
any_code_pages = true;
break;
}
// Check if we're done.
if (dst_address + size - 1 <= info.GetLastAddress()) {
break;
}
// Advance.
++it;
}
}
// Ensure that we maintain the instruction cache.
bool reprotected_pages = false;
SCOPE_EXIT({
if (reprotected_pages && any_code_pages) {
system.InvalidateCpuInstructionCacheRange(dst_address, size);
}
});
// Unmap.
{
// Determine the number of pages being operated on.
const std::size_t num_pages = size / PageSize;
// Unmap the aliased copy of the pages.
R_TRY(Operate(dst_address, num_pages, KMemoryPermission::None, OperationType::Unmap));
// Try to set the permissions for the source pages back to what they should be.
R_TRY(Operate(src_address, num_pages, KMemoryPermission::UserReadWrite,
OperationType::ChangePermissions));
// Apply the memory block updates.
block_manager->Update(dst_address, num_pages, KMemoryState::None);
block_manager->Update(src_address, num_pages, KMemoryState::Normal,
KMemoryPermission::UserReadWrite);
// Note that we reprotected pages.
reprotected_pages = true;
}
system.InvalidateCpuInstructionCacheRange(dst_addr, size);
return ResultSuccess;
}

View File

@@ -36,8 +36,8 @@ public:
KMemoryManager::Pool pool);
ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state,
KMemoryPermission perm);
ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
ResultCode MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
ResultCode UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
ResultCode UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table,
VAddr src_addr);
ResultCode MapPhysicalMemory(VAddr addr, std::size_t size);
@@ -253,9 +253,7 @@ public:
constexpr bool IsInsideASLRRegion(VAddr address, std::size_t size) const {
return !IsOutsideASLRRegion(address, size);
}
constexpr std::size_t GetNumGuardPages() const {
return IsKernel() ? 1 : 4;
}
PAddr GetPhysicalAddr(VAddr addr) const {
const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits];
ASSERT(backing_addr);
@@ -277,6 +275,10 @@ private:
return is_aslr_enabled;
}
constexpr std::size_t GetNumGuardPages() const {
return IsKernel() ? 1 : 4;
}
constexpr bool ContainsPages(VAddr addr, std::size_t num_pages) const {
return (address_space_start <= addr) &&
(num_pages <= (address_space_end - address_space_start) / PageSize) &&

View File

@@ -288,7 +288,7 @@ public:
}
bool ValidateRegionForMap(Kernel::KPageTable& page_table, VAddr start, std::size_t size) const {
const std::size_t padding_size{page_table.GetNumGuardPages() * Kernel::PageSize};
constexpr std::size_t padding_size{4 * Kernel::PageSize};
const auto start_info{page_table.QueryInfo(start - 1)};
if (start_info.state != Kernel::KMemoryState::Free) {
@@ -308,69 +308,31 @@ public:
return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize());
}
ResultCode GetAvailableMapRegion(Kernel::KPageTable& page_table, u64 size, VAddr& out_addr) {
size = Common::AlignUp(size, Kernel::PageSize);
size += page_table.GetNumGuardPages() * Kernel::PageSize * 4;
const auto is_region_available = [&](VAddr addr) {
const auto end_addr = addr + size;
while (addr < end_addr) {
if (system.Memory().IsValidVirtualAddress(addr)) {
return false;
}
if (!page_table.IsInsideAddressSpace(out_addr, size)) {
return false;
}
if (page_table.IsInsideHeapRegion(out_addr, size)) {
return false;
}
if (page_table.IsInsideAliasRegion(out_addr, size)) {
return false;
}
addr += Kernel::PageSize;
}
return true;
};
bool succeeded = false;
const auto map_region_end =
page_table.GetAliasCodeRegionStart() + page_table.GetAliasCodeRegionSize();
while (current_map_addr < map_region_end) {
if (is_region_available(current_map_addr)) {
succeeded = true;
break;
}
current_map_addr += 0x100000;
}
if (!succeeded) {
UNREACHABLE_MSG("Out of address space!");
return Kernel::ResultOutOfMemory;
}
out_addr = current_map_addr;
current_map_addr += size;
return ResultSuccess;
VAddr GetRandomMapRegion(const Kernel::KPageTable& page_table, std::size_t size) const {
VAddr addr{};
const std::size_t end_pages{(page_table.GetAliasCodeRegionSize() - size) >>
Kernel::PageBits};
do {
addr = page_table.GetAliasCodeRegionStart() +
(Kernel::KSystemControl::GenerateRandomRange(0, end_pages) << Kernel::PageBits);
} while (!page_table.IsInsideAddressSpace(addr, size) ||
page_table.IsInsideHeapRegion(addr, size) ||
page_table.IsInsideAliasRegion(addr, size));
return addr;
}
ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr base_addr, u64 size) {
auto& page_table{process->PageTable()};
VAddr addr{};
ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr baseAddress,
u64 size) const {
for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) {
R_TRY(GetAvailableMapRegion(page_table, size, addr));
auto& page_table{process->PageTable()};
const VAddr addr{GetRandomMapRegion(page_table, size)};
const ResultCode result{page_table.MapCodeMemory(addr, baseAddress, size)};
const ResultCode result{page_table.MapCodeMemory(addr, base_addr, size)};
if (result == Kernel::ResultInvalidCurrentMemory) {
continue;
}
R_TRY(result);
CASCADE_CODE(result);
if (ValidateRegionForMap(page_table, addr, size)) {
return addr;
@@ -381,7 +343,7 @@ public:
}
ResultVal<VAddr> MapNro(Kernel::KProcess* process, VAddr nro_addr, std::size_t nro_size,
VAddr bss_addr, std::size_t bss_size, std::size_t size) {
VAddr bss_addr, std::size_t bss_size, std::size_t size) const {
for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) {
auto& page_table{process->PageTable()};
VAddr addr{};
@@ -635,7 +597,6 @@ public:
LOG_WARNING(Service_LDR, "(STUBBED) called");
initialized = true;
current_map_addr = system.CurrentProcess()->PageTable().GetAliasCodeRegionStart();
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
@@ -646,7 +607,6 @@ private:
std::map<VAddr, NROInfo> nro;
std::map<VAddr, std::vector<SHA256Hash>> nrr;
VAddr current_map_addr{};
bool IsValidNROHash(const SHA256Hash& hash) const {
return std::any_of(nrr.begin(), nrr.end(), [&hash](const auto& p) {

View File

@@ -2,6 +2,30 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// This files contains code from Ryujinx
// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
// The sections using code from Ryujinx are marked with a link to the original version
// MIT License
//
// Copyright (c) Ryujinx Team and Contributors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
// associated documentation files (the "Software"), to deal in the Software without restriction,
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
@@ -13,535 +37,87 @@ namespace {
// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
u64 ttbl) {
switch (ttbl) {
// generated code, do not edit manually
case 0:
return ir.Imm32(0);
case 1:
return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseOr(b, c)));
case 2:
return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
case 3:
return ir.BitwiseNot(ir.BitwiseOr(a, b));
case 4:
return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
case 5:
return ir.BitwiseNot(ir.BitwiseOr(a, c));
case 6:
return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
case 7:
return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseAnd(b, c)));
case 8:
return ir.BitwiseAnd(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
case 9:
return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseXor(b, c)));
case 10:
return ir.BitwiseAnd(c, ir.BitwiseNot(a));
case 11:
return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(c, ir.BitwiseNot(b)));
case 12:
return ir.BitwiseAnd(b, ir.BitwiseNot(a));
case 13:
return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, ir.BitwiseNot(c)));
case 14:
return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
case 15:
return ir.BitwiseNot(a);
case 16:
return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
case 17:
return ir.BitwiseNot(ir.BitwiseOr(b, c));
case 18:
return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
case 19:
return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseAnd(a, c)));
case 20:
return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
case 21:
return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
case 22:
return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
case 23:
return ir.BitwiseXor(ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)),
ir.BitwiseNot(a));
case 24:
return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
case 25:
return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c)));
case 26:
return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
case 27:
return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
case 28:
return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
case 29:
return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
case 30:
return ir.BitwiseXor(a, ir.BitwiseOr(b, c));
case 31:
return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)));
case 32:
return ir.BitwiseAnd(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
case 33:
return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
case 34:
return ir.BitwiseAnd(c, ir.BitwiseNot(b));
case 35:
return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
case 36:
return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(b, c));
case 37:
return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c)));
case 38:
return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
case 39:
return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
case 40:
return ir.BitwiseAnd(c, ir.BitwiseXor(a, b));
case 41:
return ir.BitwiseXor(ir.BitwiseOr(a, b),
ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)));
case 42:
return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
case 43:
return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)),
ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
case 44:
return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b));
case 45:
return ir.BitwiseXor(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
case 46:
return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(b, c));
case 47:
return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(b)), ir.BitwiseNot(a));
case 48:
return ir.BitwiseAnd(a, ir.BitwiseNot(b));
case 49:
return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, ir.BitwiseNot(c)));
case 50:
return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
case 51:
return ir.BitwiseNot(b);
case 52:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
case 53:
return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
case 54:
return ir.BitwiseXor(b, ir.BitwiseOr(a, c));
case 55:
return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)));
case 56:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b));
case 57:
return ir.BitwiseXor(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
case 58:
return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(a, c));
case 59:
return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseNot(b));
case 60:
return ir.BitwiseXor(a, b);
case 61:
return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, c)), ir.BitwiseXor(a, b));
case 62:
return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, b));
case 63:
return ir.BitwiseNot(ir.BitwiseAnd(a, b));
case 64:
return ir.BitwiseAnd(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
case 65:
return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
case 66:
return ir.BitwiseAnd(ir.BitwiseXor(a, c), ir.BitwiseXor(b, c));
case 67:
return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b)));
case 68:
return ir.BitwiseAnd(b, ir.BitwiseNot(c));
case 69:
return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
case 70:
return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
case 71:
return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
case 72:
return ir.BitwiseAnd(b, ir.BitwiseXor(a, c));
case 73:
return ir.BitwiseXor(ir.BitwiseOr(a, c),
ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)));
case 74:
return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c));
case 75:
return ir.BitwiseXor(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
case 76:
return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
case 77:
return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)),
ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
case 78:
return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(b, c));
case 79:
return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(c)), ir.BitwiseNot(a));
case 80:
return ir.BitwiseAnd(a, ir.BitwiseNot(c));
case 81:
return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, ir.BitwiseNot(b)));
case 82:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
case 83:
return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
case 84:
return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
case 85:
return ir.BitwiseNot(c);
case 86:
return ir.BitwiseXor(c, ir.BitwiseOr(a, b));
case 87:
return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)));
case 88:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c));
case 89:
return ir.BitwiseXor(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
case 90:
return ir.BitwiseXor(a, c);
case 91:
return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(a, c));
case 92:
return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(a, b));
case 93:
return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseNot(c));
case 94:
return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, c));
case 95:
return ir.BitwiseNot(ir.BitwiseAnd(a, c));
case 96:
return ir.BitwiseAnd(a, ir.BitwiseXor(b, c));
case 97:
return ir.BitwiseXor(ir.BitwiseOr(b, c),
ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)));
case 98:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c));
case 99:
return ir.BitwiseXor(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
case 100:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c));
case 101:
return ir.BitwiseXor(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
case 102:
return ir.BitwiseXor(b, c);
case 103:
return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(b, c));
case 104:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(c, ir.BitwiseAnd(a, b)));
case 105:
return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
case 106:
return ir.BitwiseXor(c, ir.BitwiseAnd(a, b));
case 107:
return ir.BitwiseXor(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)),
ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 108:
return ir.BitwiseXor(b, ir.BitwiseAnd(a, c));
case 109:
return ir.BitwiseXor(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)),
ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 110:
return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
case 111:
return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
case 112:
return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
case 113:
return ir.BitwiseXor(ir.BitwiseOr(b, ir.BitwiseNot(a)),
ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
case 114:
return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, c));
case 115:
return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseNot(b));
case 116:
return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, b));
case 117:
return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseNot(c));
case 118:
return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
case 119:
return ir.BitwiseNot(ir.BitwiseAnd(b, c));
case 120:
return ir.BitwiseXor(a, ir.BitwiseAnd(b, c));
case 121:
return ir.BitwiseXor(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)),
ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 122:
return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
case 123:
return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
case 124:
return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
case 125:
return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
case 126:
return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
case 127:
return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseAnd(b, c)));
case 128:
return ir.BitwiseAnd(a, ir.BitwiseAnd(b, c));
case 129:
return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
case 130:
return ir.BitwiseAnd(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 131:
return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 132:
return ir.BitwiseAnd(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 133:
return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 134:
return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
case 135:
return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
case 136:
return ir.BitwiseAnd(b, c);
case 137:
return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 138:
return ir.BitwiseAnd(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
case 139:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
case 140:
return ir.BitwiseAnd(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
case 141:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, c)));
case 142:
return ir.BitwiseXor(a, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
case 143:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
case 144:
return ir.BitwiseAnd(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 145:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 146:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
case 147:
return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
case 148:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
case 149:
return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
case 150:
return ir.BitwiseXor(a, ir.BitwiseXor(b, c));
case 151:
return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)),
ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
case 152:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 153:
return ir.BitwiseXor(b, ir.BitwiseNot(c));
case 154:
return ir.BitwiseXor(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
case 155:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c)));
case 156:
return ir.BitwiseXor(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
case 157:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c)));
case 158:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseOr(b, c)));
case 159:
return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseXor(b, c)));
case 160:
return ir.BitwiseAnd(a, c);
case 161:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 162:
return ir.BitwiseAnd(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
case 163:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
case 164:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 165:
return ir.BitwiseXor(a, ir.BitwiseNot(c));
case 166:
return ir.BitwiseXor(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
case 167:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c)));
case 168:
return ir.BitwiseAnd(c, ir.BitwiseOr(a, b));
case 169:
return ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
case 170:
return c;
case 171:
return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
case 172:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
case 173:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 174:
return ir.BitwiseOr(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
case 175:
return ir.BitwiseOr(c, ir.BitwiseNot(a));
case 176:
return ir.BitwiseAnd(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
case 177:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(b, c)));
case 178:
return ir.BitwiseXor(b, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
case 179:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
case 180:
return ir.BitwiseXor(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
case 181:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c)));
case 182:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseOr(a, c)));
case 183:
return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseXor(a, c)));
case 184:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
case 185:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 186:
return ir.BitwiseOr(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
case 187:
return ir.BitwiseOr(c, ir.BitwiseNot(b));
case 188:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b));
case 189:
return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 190:
return ir.BitwiseOr(c, ir.BitwiseXor(a, b));
case 191:
return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
case 192:
return ir.BitwiseAnd(a, b);
case 193:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 194:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 195:
return ir.BitwiseXor(a, ir.BitwiseNot(b));
case 196:
return ir.BitwiseAnd(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
case 197:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(a, c)));
case 198:
return ir.BitwiseXor(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
case 199:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b)));
case 200:
return ir.BitwiseAnd(b, ir.BitwiseOr(a, c));
case 201:
return ir.BitwiseXor(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
case 202:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
case 203:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 204:
return b;
case 205:
return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
case 206:
return ir.BitwiseOr(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
case 207:
return ir.BitwiseOr(b, ir.BitwiseNot(a));
case 208:
return ir.BitwiseAnd(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
case 209:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(b, c)));
case 210:
return ir.BitwiseXor(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
case 211:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b)));
case 212:
return ir.BitwiseXor(c, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
case 213:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
case 214:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(c, ir.BitwiseOr(a, b)));
case 215:
return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseXor(a, b)));
case 216:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
case 217:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 218:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c));
case 219:
return ir.BitwiseOr(ir.BitwiseXor(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 220:
return ir.BitwiseOr(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
case 221:
return ir.BitwiseOr(b, ir.BitwiseNot(c));
case 222:
return ir.BitwiseOr(b, ir.BitwiseXor(a, c));
case 223:
return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
case 224:
return ir.BitwiseAnd(a, ir.BitwiseOr(b, c));
case 225:
return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
case 226:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
case 227:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 228:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
case 229:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 230:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c));
case 231:
return ir.BitwiseOr(ir.BitwiseXor(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
case 232:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
case 233:
return ir.BitwiseOr(ir.BitwiseAnd(a, b),
ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b)));
case 234:
return ir.BitwiseOr(c, ir.BitwiseAnd(a, b));
case 235:
return ir.BitwiseOr(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 236:
return ir.BitwiseOr(b, ir.BitwiseAnd(a, c));
case 237:
return ir.BitwiseOr(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 238:
return ir.BitwiseOr(b, c);
case 239:
return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
case 240:
return a;
case 241:
return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
case 242:
return ir.BitwiseOr(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
case 243:
return ir.BitwiseOr(a, ir.BitwiseNot(b));
case 244:
return ir.BitwiseOr(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
case 245:
return ir.BitwiseOr(a, ir.BitwiseNot(c));
case 246:
return ir.BitwiseOr(a, ir.BitwiseXor(b, c));
case 247:
return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
case 248:
return ir.BitwiseOr(a, ir.BitwiseAnd(b, c));
case 249:
return ir.BitwiseOr(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 250:
return ir.BitwiseOr(a, c);
case 251:
return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
case 252:
return ir.BitwiseOr(a, b);
case 253:
return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
case 254:
return ir.BitwiseOr(a, ir.BitwiseOr(b, c));
case 255:
return ir.Imm32(0xFFFFFFFF);
// end of generated code
std::optional<IR::U32> value;
// Encode into gray code.
u32 map = ttbl & 1;
map |= ((ttbl >> 1) & 1) << 4;
map |= ((ttbl >> 2) & 1) << 1;
map |= ((ttbl >> 3) & 1) << 5;
map |= ((ttbl >> 4) & 1) << 3;
map |= ((ttbl >> 5) & 1) << 7;
map |= ((ttbl >> 6) & 1) << 2;
map |= ((ttbl >> 7) & 1) << 6;
u32 visited = 0;
for (u32 index = 0; index < 8 && visited != 0xff; index++) {
if ((map & (1 << index)) == 0) {
continue;
}
const auto RotateLeft4 = [](u32 value, u32 shift) {
return ((value << shift) | (value >> (4 - shift))) & 0xf;
};
u32 mask = 0;
for (u32 size = 4; size != 0; size >>= 1) {
mask = RotateLeft4((1 << size) - 1, index & 3) << (index & 4);
if ((map & mask) == mask) {
break;
}
}
// The mask should wrap, if we are on the high row, shift to low etc.
const u32 mask2 = (index & 4) != 0 ? mask >> 4 : mask << 4;
if ((map & mask2) == mask2) {
mask |= mask2;
}
if ((mask & visited) == mask) {
continue;
}
const bool not_a = (mask & 0x33) != 0;
const bool not_b = (mask & 0x99) != 0;
const bool not_c = (mask & 0x0f) != 0;
const bool a_changes = (mask & 0xcc) != 0 && not_a;
const bool b_changes = (mask & 0x66) != 0 && not_b;
const bool c_changes = (mask & 0xf0) != 0 && not_c;
std::optional<IR::U32> local_value;
const auto And = [&](const IR::U32& source, bool inverted) {
IR::U32 result = inverted ? ir.BitwiseNot(source) : source;
if (local_value) {
local_value = ir.BitwiseAnd(*local_value, result);
} else {
local_value = result;
}
};
if (!a_changes) {
And(a, not_a);
}
if (!b_changes) {
And(b, not_b);
}
if (!c_changes) {
And(c, not_c);
}
if (value) {
value = ir.BitwiseOr(*value, *local_value);
} else {
value = local_value;
}
visited |= mask;
}
throw NotImplementedException("LOP3 with out of range ttbl");
return *value;
}
IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {

View File

@@ -1,92 +0,0 @@
# Copyright © 2022 degasus <markus@selfnet.de>
# This work is free. You can redistribute it and/or modify it under the
# terms of the Do What The Fuck You Want To Public License, Version 2,
# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
from itertools import product
# The primitive instructions
OPS = {
'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b),
'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b),
'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b),
'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions
}
# Our database of combination of instructions
optimized_calls = {}
def cmp(lhs, rhs):
if lhs is None: # new entry
return True
if lhs[3] > rhs[3]: # costs
return True
if lhs[3] < rhs[3]: # costs
return False
if len(lhs[0]) > len(rhs[0]): # string len
return True
if len(lhs[0]) < len(rhs[0]): # string len
return False
if lhs[0] > rhs[0]: # string sorting
return True
if lhs[0] < rhs[0]: # string sorting
return False
assert lhs == rhs, "redundant instruction, bug in brute force"
return False
def register(imm, instruction, count, latency):
# Use the sum of instruction count and latency as costs to evaluate which combination is best
costs = count + latency
old = optimized_calls.get(imm, None)
new = (instruction, count, latency, costs)
# Update if new or better
if cmp(old, new):
optimized_calls[imm] = new
return True
return False
# Constants: 0, 1 (for free)
register(0, 'ir.Imm32(0)', 0, 0)
register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0)
# Inputs: a, b, c (for free)
ta = 0xF0
tb = 0xCC
tc = 0xAA
inputs = {
ta : 'a',
tb : 'b',
tc : 'c',
}
for imm, instruction in inputs.items():
register(imm, instruction, 0, 0)
register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs
# Try to combine two values from the db with an instruction.
# If it is better than the old method, update it.
while True:
registered = 0
calls_copy = optimized_calls.copy()
for OP, (argc, cost, f) in OPS.items():
for args in product(calls_copy.items(), repeat=argc):
# unpack(transponse) the arrays
imm = [arg[0] for arg in args]
value = [arg[1][0] for arg in args]
count = [arg[1][1] for arg in args]
latency = [arg[1][2] for arg in args]
registered += register(
f(*imm),
OP.format(*value),
sum(count) + cost,
max(latency) + cost)
if registered == 0:
# No update at all? So terminate
break
# Hacky output. Please improve me to output valid C++ instead.
s = """ case {imm}:
return {op};"""
for imm in range(256):
print(s.format(imm=imm, op=optimized_calls[imm][0]))

View File

@@ -53,6 +53,7 @@ void MaxwellDMA::Launch() {
// TODO(Subv): Perform more research and implement all features of this engine.
const LaunchDMA& launch = regs.launch_dma;
ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
ASSERT(regs.dst_params.origin.x == 0);
@@ -78,7 +79,6 @@ void MaxwellDMA::Launch() {
CopyPitchToBlockLinear();
}
}
ReleaseSemaphore();
}
void MaxwellDMA::CopyPitchToPitch() {
@@ -244,22 +244,4 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::ReleaseSemaphore() {
const auto type = regs.launch_dma.semaphore_type;
const GPUVAddr address = regs.semaphore.address;
switch (type) {
case LaunchDMA::SemaphoreType::NONE:
break;
case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE:
memory_manager.Write<u32>(address, regs.semaphore.payload);
break;
case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE:
memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload));
memory_manager.Write<u64>(address + 8, system.GPU().GetTicks());
break;
default:
UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value()));
}
}
} // namespace Tegra::Engines

View File

@@ -224,8 +224,6 @@ private:
void FastCopyBlockLinearToPitch();
void ReleaseSemaphore();
Core::System& system;
MemoryManager& memory_manager;

View File

@@ -50,7 +50,6 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
gpu->BindRenderer(std::move(renderer));
return gpu;
} catch (const std::runtime_error& exception) {
scope.Cancel();
LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
return nullptr;
}