Compare commits

..

1 Commits

Author SHA1 Message Date
Lody
bd11b10298 shader: rewrite LOP3.LUT
shader: opt
2022-03-07 12:15:34 +08:00
33 changed files with 303 additions and 1175 deletions

View File

@@ -363,11 +363,7 @@ if(ENABLE_QT)
set(YUZU_QT_NO_CMAKE_SYSTEM_PATH "NO_CMAKE_SYSTEM_PATH")
endif()
if ((${CMAKE_SYSTEM_NAME} STREQUAL "Linux") AND YUZU_USE_BUNDLED_QT)
find_package(Qt5 ${QT_VERSION} REQUIRED COMPONENTS Widgets DBus ${QT_PREFIX_HINT} ${YUZU_QT_NO_CMAKE_SYSTEM_PATH})
else()
find_package(Qt5 ${QT_VERSION} REQUIRED COMPONENTS Widgets ${QT_PREFIX_HINT} ${YUZU_QT_NO_CMAKE_SYSTEM_PATH})
endif()
find_package(Qt5 ${QT_VERSION} REQUIRED COMPONENTS Widgets ${QT_PREFIX_HINT} ${YUZU_QT_NO_CMAKE_SYSTEM_PATH})
if (YUZU_USE_QT_WEB_ENGINE)
find_package(Qt5 COMPONENTS WebEngineCore WebEngineWidgets)
endif()

View File

@@ -57,11 +57,4 @@ requires std::is_integral_v<T>
return static_cast<T>(1ULL << ((8U * sizeof(T)) - std::countl_zero(value - 1U)));
}
template <size_t bit_index, typename T>
requires std::is_integral_v<T>
[[nodiscard]] constexpr bool Bit(const T value) {
static_assert(bit_index < BitSize<T>(), "bit_index must be smaller than size of T");
return ((value >> bit_index) & T(1)) == T(1);
}
} // namespace Common

View File

@@ -276,9 +276,9 @@ private:
ColorConsoleBackend color_console_backend{};
FileBackend file_backend;
std::jthread backend_thread;
MPSCQueue<Entry, true> message_queue{};
std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
std::jthread backend_thread;
};
} // namespace

View File

@@ -55,50 +55,22 @@ void AppendBuildInfo(FieldCollection& fc) {
void AppendCPUInfo(FieldCollection& fc) {
#ifdef ARCHITECTURE_x86_64
const auto& caps = Common::GetCPUCaps();
const auto add_field = [&fc](std::string_view field_name, const auto& field_value) {
fc.AddField(FieldType::UserSystem, field_name, field_value);
};
add_field("CPU_Model", caps.cpu_string);
add_field("CPU_BrandString", caps.brand_string);
add_field("CPU_Extension_x64_SSE", caps.sse);
add_field("CPU_Extension_x64_SSE2", caps.sse2);
add_field("CPU_Extension_x64_SSE3", caps.sse3);
add_field("CPU_Extension_x64_SSSE3", caps.ssse3);
add_field("CPU_Extension_x64_SSE41", caps.sse4_1);
add_field("CPU_Extension_x64_SSE42", caps.sse4_2);
add_field("CPU_Extension_x64_AVX", caps.avx);
add_field("CPU_Extension_x64_AVX_VNNI", caps.avx_vnni);
add_field("CPU_Extension_x64_AVX2", caps.avx2);
// Skylake-X/SP level AVX512, for compatibility with the previous telemetry field
add_field("CPU_Extension_x64_AVX512",
caps.avx512f && caps.avx512cd && caps.avx512vl && caps.avx512dq && caps.avx512bw);
add_field("CPU_Extension_x64_AVX512F", caps.avx512f);
add_field("CPU_Extension_x64_AVX512CD", caps.avx512cd);
add_field("CPU_Extension_x64_AVX512VL", caps.avx512vl);
add_field("CPU_Extension_x64_AVX512DQ", caps.avx512dq);
add_field("CPU_Extension_x64_AVX512BW", caps.avx512bw);
add_field("CPU_Extension_x64_AVX512BITALG", caps.avx512bitalg);
add_field("CPU_Extension_x64_AVX512VBMI", caps.avx512vbmi);
add_field("CPU_Extension_x64_AES", caps.aes);
add_field("CPU_Extension_x64_BMI1", caps.bmi1);
add_field("CPU_Extension_x64_BMI2", caps.bmi2);
add_field("CPU_Extension_x64_F16C", caps.f16c);
add_field("CPU_Extension_x64_FMA", caps.fma);
add_field("CPU_Extension_x64_FMA4", caps.fma4);
add_field("CPU_Extension_x64_GFNI", caps.gfni);
add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc);
add_field("CPU_Extension_x64_LZCNT", caps.lzcnt);
add_field("CPU_Extension_x64_MOVBE", caps.movbe);
add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq);
add_field("CPU_Extension_x64_POPCNT", caps.popcnt);
add_field("CPU_Extension_x64_SHA", caps.sha);
fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);
fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA4", Common::GetCPUCaps().fma4);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE", Common::GetCPUCaps().sse);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE2", Common::GetCPUCaps().sse2);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE3", Common::GetCPUCaps().sse3);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSSE3", Common::GetCPUCaps().ssse3);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE41", Common::GetCPUCaps().sse4_1);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE42", Common::GetCPUCaps().sse4_2);
#else
fc.AddField(FieldType::UserSystem, "CPU_Model", "Other");
#endif

View File

@@ -8,7 +8,6 @@
#include <map>
#include <memory>
#include <string>
#include <string_view>
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -56,8 +55,8 @@ class Field : public FieldInterface {
public:
YUZU_NON_COPYABLE(Field);
Field(FieldType type_, std::string_view name_, T value_)
: name(name_), type(type_), value(std::move(value_)) {}
Field(FieldType type_, std::string name_, T value_)
: name(std::move(name_)), type(type_), value(std::move(value_)) {}
~Field() override = default;
@@ -124,7 +123,7 @@ public:
* @param value Value for the field to add.
*/
template <typename T>
void AddField(FieldType type, std::string_view name, T value) {
void AddField(FieldType type, const char* name, T value) {
return AddField(std::make_unique<Field<T>>(type, name, std::move(value)));
}

View File

@@ -1,12 +1,8 @@
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator
// Project Licensed under GPLv2 or any later version Refer to the license.txt file included.
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <cstring>
#include <iterator>
#include <span>
#include <string_view>
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/x64/cpu_detect.h"
@@ -21,7 +17,7 @@
// clang-format on
#endif
static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) {
static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
#if defined(__DragonFly__) || defined(__FreeBSD__)
// Despite the name, this is just do_cpuid() with ECX as second input.
cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
@@ -34,7 +30,7 @@ static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) {
#endif
}
static inline void __cpuid(int info[4], u32 function_id) {
static inline void __cpuid(int info[4], int function_id) {
return __cpuidex(info, function_id, 0);
}
@@ -49,17 +45,6 @@ static inline u64 _xgetbv(u32 index) {
namespace Common {
CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) {
if (brand_string == "GenuineIntel") {
return Manufacturer::Intel;
} else if (brand_string == "AuthenticAMD") {
return Manufacturer::AMD;
} else if (brand_string == "HygonGenuine") {
return Manufacturer::Hygon;
}
return Manufacturer::Unknown;
}
// Detects the various CPU features
static CPUCaps Detect() {
CPUCaps caps = {};
@@ -68,74 +53,75 @@ static CPUCaps Detect() {
// yuzu at all anyway
int cpu_id[4];
memset(caps.brand_string, 0, sizeof(caps.brand_string));
// Detect CPU's CPUID capabilities and grab manufacturer string
// Detect CPU's CPUID capabilities and grab CPU string
__cpuid(cpu_id, 0x00000000);
const u32 max_std_fn = cpu_id[0]; // EAX
u32 max_std_fn = cpu_id[0]; // EAX
std::memset(caps.brand_string, 0, std::size(caps.brand_string));
std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(u32));
std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(u32));
std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(u32));
caps.manufacturer = CPUCaps::ParseManufacturer(caps.brand_string);
// Set reasonable default cpu string even if brand string not available
std::strncpy(caps.cpu_string, caps.brand_string, std::size(caps.brand_string));
std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
caps.manufacturer = Manufacturer::Intel;
else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
caps.manufacturer = Manufacturer::AMD;
else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
caps.manufacturer = Manufacturer::Hygon;
else
caps.manufacturer = Manufacturer::Unknown;
__cpuid(cpu_id, 0x80000000);
const u32 max_ex_fn = cpu_id[0];
u32 max_ex_fn = cpu_id[0];
// Set reasonable default brand string even if brand string not available
strcpy(caps.cpu_string, caps.brand_string);
// Detect family and other miscellaneous features
if (max_std_fn >= 1) {
__cpuid(cpu_id, 0x00000001);
caps.sse = Common::Bit<25>(cpu_id[3]);
caps.sse2 = Common::Bit<26>(cpu_id[3]);
caps.sse3 = Common::Bit<0>(cpu_id[2]);
caps.pclmulqdq = Common::Bit<1>(cpu_id[2]);
caps.ssse3 = Common::Bit<9>(cpu_id[2]);
caps.sse4_1 = Common::Bit<19>(cpu_id[2]);
caps.sse4_2 = Common::Bit<20>(cpu_id[2]);
caps.movbe = Common::Bit<22>(cpu_id[2]);
caps.popcnt = Common::Bit<23>(cpu_id[2]);
caps.aes = Common::Bit<25>(cpu_id[2]);
caps.f16c = Common::Bit<29>(cpu_id[2]);
if ((cpu_id[3] >> 25) & 1)
caps.sse = true;
if ((cpu_id[3] >> 26) & 1)
caps.sse2 = true;
if ((cpu_id[2]) & 1)
caps.sse3 = true;
if ((cpu_id[2] >> 9) & 1)
caps.ssse3 = true;
if ((cpu_id[2] >> 19) & 1)
caps.sse4_1 = true;
if ((cpu_id[2] >> 20) & 1)
caps.sse4_2 = true;
if ((cpu_id[2] >> 25) & 1)
caps.aes = true;
// AVX support requires 3 separate checks:
// - Is the AVX bit set in CPUID?
// - Is the XSAVE bit set in CPUID?
// - XGETBV result has the XCR bit set.
if (Common::Bit<28>(cpu_id[2]) && Common::Bit<27>(cpu_id[2])) {
if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) {
if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
caps.avx = true;
if (Common::Bit<12>(cpu_id[2]))
if ((cpu_id[2] >> 12) & 1)
caps.fma = true;
}
}
if (max_std_fn >= 7) {
__cpuidex(cpu_id, 0x00000007, 0x00000000);
// Can't enable AVX{2,512} unless the XSAVE/XGETBV checks above passed
if (caps.avx) {
caps.avx2 = Common::Bit<5>(cpu_id[1]);
caps.avx512f = Common::Bit<16>(cpu_id[1]);
caps.avx512dq = Common::Bit<17>(cpu_id[1]);
caps.avx512cd = Common::Bit<28>(cpu_id[1]);
caps.avx512bw = Common::Bit<30>(cpu_id[1]);
caps.avx512vl = Common::Bit<31>(cpu_id[1]);
caps.avx512vbmi = Common::Bit<1>(cpu_id[2]);
caps.avx512bitalg = Common::Bit<12>(cpu_id[2]);
// Can't enable AVX2 unless the XSAVE/XGETBV checks above passed
if ((cpu_id[1] >> 5) & 1)
caps.avx2 = caps.avx;
if ((cpu_id[1] >> 3) & 1)
caps.bmi1 = true;
if ((cpu_id[1] >> 8) & 1)
caps.bmi2 = true;
// Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
(cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
caps.avx512 = caps.avx2;
}
caps.bmi1 = Common::Bit<3>(cpu_id[1]);
caps.bmi2 = Common::Bit<8>(cpu_id[1]);
caps.sha = Common::Bit<29>(cpu_id[1]);
caps.gfni = Common::Bit<8>(cpu_id[2]);
__cpuidex(cpu_id, 0x00000007, 0x00000001);
caps.avx_vnni = caps.avx && Common::Bit<4>(cpu_id[0]);
}
}
@@ -152,13 +138,15 @@ static CPUCaps Detect() {
if (max_ex_fn >= 0x80000001) {
// Check for more features
__cpuid(cpu_id, 0x80000001);
caps.lzcnt = Common::Bit<5>(cpu_id[2]);
caps.fma4 = Common::Bit<16>(cpu_id[2]);
if ((cpu_id[2] >> 16) & 1)
caps.fma4 = true;
}
if (max_ex_fn >= 0x80000007) {
__cpuid(cpu_id, 0x80000007);
caps.invariant_tsc = Common::Bit<8>(cpu_id[3]);
if (cpu_id[3] & (1 << 8)) {
caps.invariant_tsc = true;
}
}
if (max_std_fn >= 0x16) {

View File

@@ -1,65 +1,42 @@
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator
// Project Project Licensed under GPLv2 or any later version Refer to the license.txt file included.
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string_view>
#include "common/common_types.h"
namespace Common {
enum class Manufacturer : u32 {
Intel = 0,
AMD = 1,
Hygon = 2,
Unknown = 3,
};
/// x86/x64 CPU capabilities that may be detected by this module
struct CPUCaps {
enum class Manufacturer : u8 {
Unknown = 0,
Intel = 1,
AMD = 2,
Hygon = 3,
};
static Manufacturer ParseManufacturer(std::string_view brand_string);
Manufacturer manufacturer;
char brand_string[13];
char cpu_string[48];
char cpu_string[0x21];
char brand_string[0x41];
bool sse;
bool sse2;
bool sse3;
bool ssse3;
bool sse4_1;
bool sse4_2;
bool lzcnt;
bool avx;
bool avx2;
bool avx512;
bool bmi1;
bool bmi2;
bool fma;
bool fma4;
bool aes;
bool invariant_tsc;
u32 base_frequency;
u32 max_frequency;
u32 bus_frequency;
bool sse : 1;
bool sse2 : 1;
bool sse3 : 1;
bool ssse3 : 1;
bool sse4_1 : 1;
bool sse4_2 : 1;
bool avx : 1;
bool avx_vnni : 1;
bool avx2 : 1;
bool avx512f : 1;
bool avx512dq : 1;
bool avx512cd : 1;
bool avx512bw : 1;
bool avx512vl : 1;
bool avx512vbmi : 1;
bool avx512bitalg : 1;
bool aes : 1;
bool bmi1 : 1;
bool bmi2 : 1;
bool f16c : 1;
bool fma : 1;
bool fma4 : 1;
bool gfni : 1;
bool invariant_tsc : 1;
bool lzcnt : 1;
bool movbe : 1;
bool pclmulqdq : 1;
bool popcnt : 1;
bool sha : 1;
};
/**

View File

@@ -148,8 +148,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.wall_clock_cntpct = uses_wall_clock;
// Code cache size
config.code_cache_size = 128_MiB;
config.far_code_offset = 100_MiB;
config.code_cache_size = 512_MiB;
config.far_code_offset = 400_MiB;
// Safe optimizations
if (Settings::values.cpu_debug_mode) {

View File

@@ -208,8 +208,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.wall_clock_cntpct = uses_wall_clock;
// Code cache size
config.code_cache_size = 128_MiB;
config.far_code_offset = 100_MiB;
config.code_cache_size = 512_MiB;
config.far_code_offset = 400_MiB;
// Safe optimizations
if (Settings::values.cpu_debug_mode) {

View File

@@ -326,9 +326,7 @@ struct System::Impl {
is_powered_on = false;
exit_lock = false;
if (gpu_core != nullptr) {
gpu_core->NotifyShutdown();
}
gpu_core->NotifyShutdown();
services.reset();
service_manager.reset();

View File

@@ -42,20 +42,11 @@ public:
context.MakeCurrent();
}
~Scoped() {
if (active) {
context.DoneCurrent();
}
}
/// In the event that context was destroyed before the Scoped is destroyed, this provides a
/// mechanism to prevent calling a destroyed object's method during the deconstructor
void Cancel() {
active = false;
context.DoneCurrent();
}
private:
GraphicsContext& context;
bool active{true};
};
/// Calls MakeCurrent on the context and calls DoneCurrent when the scope for the returned value

View File

@@ -285,141 +285,72 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory
return ResultSuccess;
}
ResultCode KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
// Validate the mapping request.
R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
ResultInvalidMemoryRegion);
// Lock the table.
ResultCode KPageTable::MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
KScopedLightLock lk(general_lock);
// Verify that the source memory is normal heap.
KMemoryState src_state{};
KMemoryPermission src_perm{};
std::size_t num_src_allocator_blocks{};
R_TRY(this->CheckMemoryState(&src_state, &src_perm, nullptr, &num_src_allocator_blocks,
src_address, size, KMemoryState::All, KMemoryState::Normal,
KMemoryPermission::All, KMemoryPermission::UserReadWrite,
KMemoryAttribute::All, KMemoryAttribute::None));
const std::size_t num_pages{size / PageSize};
// Verify that the destination memory is unmapped.
std::size_t num_dst_allocator_blocks{};
R_TRY(this->CheckMemoryState(&num_dst_allocator_blocks, dst_address, size, KMemoryState::All,
KMemoryState::Free, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::None,
KMemoryAttribute::None));
KMemoryState state{};
KMemoryPermission perm{};
CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, nullptr, src_addr, size,
KMemoryState::All, KMemoryState::Normal, KMemoryPermission::All,
KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask,
KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
// Map the code memory.
{
// Determine the number of pages being operated on.
const std::size_t num_pages = size / PageSize;
// Create page groups for the memory being mapped.
KPageLinkedList pg;
AddRegionToPages(src_address, num_pages, pg);
// Reprotect the source as kernel-read/not mapped.
const auto new_perm = static_cast<KMemoryPermission>(KMemoryPermission::KernelRead |
KMemoryPermission::NotMapped);
R_TRY(Operate(src_address, num_pages, new_perm, OperationType::ChangePermissions));
// Ensure that we unprotect the source pages on failure.
auto unprot_guard = SCOPE_GUARD({
ASSERT(this->Operate(src_address, num_pages, src_perm, OperationType::ChangePermissions)
.IsSuccess());
});
// Map the alias pages.
R_TRY(MapPages(dst_address, pg, new_perm));
// We successfully mapped the alias pages, so we don't need to unprotect the src pages on
// failure.
unprot_guard.Cancel();
// Apply the memory block updates.
block_manager->Update(src_address, num_pages, src_state, new_perm,
KMemoryAttribute::Locked);
block_manager->Update(dst_address, num_pages, KMemoryState::AliasCode, new_perm,
KMemoryAttribute::None);
if (IsRegionMapped(dst_addr, size)) {
return ResultInvalidCurrentMemory;
}
KPageLinkedList page_linked_list;
AddRegionToPages(src_addr, num_pages, page_linked_list);
{
auto block_guard = detail::ScopeExit(
[&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); });
CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None,
OperationType::ChangePermissions));
CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::None));
block_guard.Cancel();
}
block_manager->Update(src_addr, num_pages, state, KMemoryPermission::None,
KMemoryAttribute::Locked);
block_manager->Update(dst_addr, num_pages, KMemoryState::AliasCode);
return ResultSuccess;
}
ResultCode KPageTable::UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
// Validate the mapping request.
R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
ResultInvalidMemoryRegion);
// Lock the table.
ResultCode KPageTable::UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
KScopedLightLock lk(general_lock);
// Verify that the source memory is locked normal heap.
std::size_t num_src_allocator_blocks{};
R_TRY(this->CheckMemoryState(std::addressof(num_src_allocator_blocks), src_address, size,
KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::All,
KMemoryAttribute::Locked));
if (!size) {
return ResultSuccess;
}
// Verify that the destination memory is aliasable code.
std::size_t num_dst_allocator_blocks{};
R_TRY(this->CheckMemoryStateContiguous(
std::addressof(num_dst_allocator_blocks), dst_address, size, KMemoryState::FlagCanCodeAlias,
const std::size_t num_pages{size / PageSize};
CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, nullptr, src_addr, size,
KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::Mask,
KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
KMemoryState state{};
CASCADE_CODE(CheckMemoryState(
&state, nullptr, nullptr, nullptr, dst_addr, PageSize, KMemoryState::FlagCanCodeAlias,
KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
KMemoryAttribute::All, KMemoryAttribute::None));
KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
CASCADE_CODE(CheckMemoryState(dst_addr, size, KMemoryState::All, state, KMemoryPermission::None,
KMemoryPermission::None, KMemoryAttribute::Mask,
KMemoryAttribute::None));
CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap));
// Determine whether any pages being unmapped are code.
bool any_code_pages = false;
{
KMemoryBlockManager::const_iterator it = block_manager->FindIterator(dst_address);
while (true) {
// Get the memory info.
const KMemoryInfo info = it->GetMemoryInfo();
block_manager->Update(dst_addr, num_pages, KMemoryState::Free);
block_manager->Update(src_addr, num_pages, KMemoryState::Normal,
KMemoryPermission::UserReadWrite);
// Check if the memory has code flag.
if ((info.GetState() & KMemoryState::FlagCode) != KMemoryState::None) {
any_code_pages = true;
break;
}
// Check if we're done.
if (dst_address + size - 1 <= info.GetLastAddress()) {
break;
}
// Advance.
++it;
}
}
// Ensure that we maintain the instruction cache.
bool reprotected_pages = false;
SCOPE_EXIT({
if (reprotected_pages && any_code_pages) {
system.InvalidateCpuInstructionCacheRange(dst_address, size);
}
});
// Unmap.
{
// Determine the number of pages being operated on.
const std::size_t num_pages = size / PageSize;
// Unmap the aliased copy of the pages.
R_TRY(Operate(dst_address, num_pages, KMemoryPermission::None, OperationType::Unmap));
// Try to set the permissions for the source pages back to what they should be.
R_TRY(Operate(src_address, num_pages, KMemoryPermission::UserReadWrite,
OperationType::ChangePermissions));
// Apply the memory block updates.
block_manager->Update(dst_address, num_pages, KMemoryState::None);
block_manager->Update(src_address, num_pages, KMemoryState::Normal,
KMemoryPermission::UserReadWrite);
// Note that we reprotected pages.
reprotected_pages = true;
}
system.InvalidateCpuInstructionCacheRange(dst_addr, size);
return ResultSuccess;
}

View File

@@ -36,8 +36,8 @@ public:
KMemoryManager::Pool pool);
ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state,
KMemoryPermission perm);
ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
ResultCode MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
ResultCode UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
ResultCode UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table,
VAddr src_addr);
ResultCode MapPhysicalMemory(VAddr addr, std::size_t size);
@@ -253,9 +253,7 @@ public:
constexpr bool IsInsideASLRRegion(VAddr address, std::size_t size) const {
return !IsOutsideASLRRegion(address, size);
}
constexpr std::size_t GetNumGuardPages() const {
return IsKernel() ? 1 : 4;
}
PAddr GetPhysicalAddr(VAddr addr) const {
const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits];
ASSERT(backing_addr);
@@ -277,6 +275,10 @@ private:
return is_aslr_enabled;
}
constexpr std::size_t GetNumGuardPages() const {
return IsKernel() ? 1 : 4;
}
constexpr bool ContainsPages(VAddr addr, std::size_t num_pages) const {
return (address_space_start <= addr) &&
(num_pages <= (address_space_end - address_space_start) / PageSize) &&

View File

@@ -288,7 +288,7 @@ public:
}
bool ValidateRegionForMap(Kernel::KPageTable& page_table, VAddr start, std::size_t size) const {
const std::size_t padding_size{page_table.GetNumGuardPages() * Kernel::PageSize};
constexpr std::size_t padding_size{4 * Kernel::PageSize};
const auto start_info{page_table.QueryInfo(start - 1)};
if (start_info.state != Kernel::KMemoryState::Free) {
@@ -308,69 +308,31 @@ public:
return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize());
}
ResultCode GetAvailableMapRegion(Kernel::KPageTable& page_table, u64 size, VAddr& out_addr) {
size = Common::AlignUp(size, Kernel::PageSize);
size += page_table.GetNumGuardPages() * Kernel::PageSize * 4;
const auto is_region_available = [&](VAddr addr) {
const auto end_addr = addr + size;
while (addr < end_addr) {
if (system.Memory().IsValidVirtualAddress(addr)) {
return false;
}
if (!page_table.IsInsideAddressSpace(out_addr, size)) {
return false;
}
if (page_table.IsInsideHeapRegion(out_addr, size)) {
return false;
}
if (page_table.IsInsideAliasRegion(out_addr, size)) {
return false;
}
addr += Kernel::PageSize;
}
return true;
};
bool succeeded = false;
const auto map_region_end =
page_table.GetAliasCodeRegionStart() + page_table.GetAliasCodeRegionSize();
while (current_map_addr < map_region_end) {
if (is_region_available(current_map_addr)) {
succeeded = true;
break;
}
current_map_addr += 0x100000;
}
if (!succeeded) {
UNREACHABLE_MSG("Out of address space!");
return Kernel::ResultOutOfMemory;
}
out_addr = current_map_addr;
current_map_addr += size;
return ResultSuccess;
VAddr GetRandomMapRegion(const Kernel::KPageTable& page_table, std::size_t size) const {
VAddr addr{};
const std::size_t end_pages{(page_table.GetAliasCodeRegionSize() - size) >>
Kernel::PageBits};
do {
addr = page_table.GetAliasCodeRegionStart() +
(Kernel::KSystemControl::GenerateRandomRange(0, end_pages) << Kernel::PageBits);
} while (!page_table.IsInsideAddressSpace(addr, size) ||
page_table.IsInsideHeapRegion(addr, size) ||
page_table.IsInsideAliasRegion(addr, size));
return addr;
}
ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr base_addr, u64 size) {
auto& page_table{process->PageTable()};
VAddr addr{};
ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr baseAddress,
u64 size) const {
for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) {
R_TRY(GetAvailableMapRegion(page_table, size, addr));
auto& page_table{process->PageTable()};
const VAddr addr{GetRandomMapRegion(page_table, size)};
const ResultCode result{page_table.MapCodeMemory(addr, baseAddress, size)};
const ResultCode result{page_table.MapCodeMemory(addr, base_addr, size)};
if (result == Kernel::ResultInvalidCurrentMemory) {
continue;
}
R_TRY(result);
CASCADE_CODE(result);
if (ValidateRegionForMap(page_table, addr, size)) {
return addr;
@@ -381,7 +343,7 @@ public:
}
ResultVal<VAddr> MapNro(Kernel::KProcess* process, VAddr nro_addr, std::size_t nro_size,
VAddr bss_addr, std::size_t bss_size, std::size_t size) {
VAddr bss_addr, std::size_t bss_size, std::size_t size) const {
for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) {
auto& page_table{process->PageTable()};
VAddr addr{};
@@ -635,7 +597,6 @@ public:
LOG_WARNING(Service_LDR, "(STUBBED) called");
initialized = true;
current_map_addr = system.CurrentProcess()->PageTable().GetAliasCodeRegionStart();
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
@@ -646,7 +607,6 @@ private:
std::map<VAddr, NROInfo> nro;
std::map<VAddr, std::vector<SHA256Hash>> nrr;
VAddr current_map_addr{};
bool IsValidNROHash(const SHA256Hash& hash) const {
return std::any_of(nrr.begin(), nrr.end(), [&hash](const auto& p) {

View File

@@ -22,7 +22,7 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS =
struct RescalingLayout {
alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures;
alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images;
u32 down_factor;
alignas(16) u32 down_factor;
};
constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures);
constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor);

View File

@@ -2,6 +2,30 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// This files contains code from Ryujinx
// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
// The sections using code from Ryujinx are marked with a link to the original version
// MIT License
//
// Copyright (c) Ryujinx Team and Contributors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
// associated documentation files (the "Software"), to deal in the Software without restriction,
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
@@ -13,535 +37,87 @@ namespace {
// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
u64 ttbl) {
switch (ttbl) {
// generated code, do not edit manually
case 0:
return ir.Imm32(0);
case 1:
return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseOr(b, c)));
case 2:
return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
case 3:
return ir.BitwiseNot(ir.BitwiseOr(a, b));
case 4:
return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
case 5:
return ir.BitwiseNot(ir.BitwiseOr(a, c));
case 6:
return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
case 7:
return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseAnd(b, c)));
case 8:
return ir.BitwiseAnd(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
case 9:
return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseXor(b, c)));
case 10:
return ir.BitwiseAnd(c, ir.BitwiseNot(a));
case 11:
return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(c, ir.BitwiseNot(b)));
case 12:
return ir.BitwiseAnd(b, ir.BitwiseNot(a));
case 13:
return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, ir.BitwiseNot(c)));
case 14:
return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
case 15:
return ir.BitwiseNot(a);
case 16:
return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
case 17:
return ir.BitwiseNot(ir.BitwiseOr(b, c));
case 18:
return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
case 19:
return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseAnd(a, c)));
case 20:
return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
case 21:
return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
case 22:
return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
case 23:
return ir.BitwiseXor(ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)),
ir.BitwiseNot(a));
case 24:
return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
case 25:
return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c)));
case 26:
return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
case 27:
return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
case 28:
return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
case 29:
return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
case 30:
return ir.BitwiseXor(a, ir.BitwiseOr(b, c));
case 31:
return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)));
case 32:
return ir.BitwiseAnd(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
case 33:
return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
case 34:
return ir.BitwiseAnd(c, ir.BitwiseNot(b));
case 35:
return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
case 36:
return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(b, c));
case 37:
return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c)));
case 38:
return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
case 39:
return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
case 40:
return ir.BitwiseAnd(c, ir.BitwiseXor(a, b));
case 41:
return ir.BitwiseXor(ir.BitwiseOr(a, b),
ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)));
case 42:
return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
case 43:
return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)),
ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
case 44:
return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b));
case 45:
return ir.BitwiseXor(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
case 46:
return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(b, c));
case 47:
return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(b)), ir.BitwiseNot(a));
case 48:
return ir.BitwiseAnd(a, ir.BitwiseNot(b));
case 49:
return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, ir.BitwiseNot(c)));
case 50:
return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
case 51:
return ir.BitwiseNot(b);
case 52:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
case 53:
return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
case 54:
return ir.BitwiseXor(b, ir.BitwiseOr(a, c));
case 55:
return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)));
case 56:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b));
case 57:
return ir.BitwiseXor(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
case 58:
return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(a, c));
case 59:
return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseNot(b));
case 60:
return ir.BitwiseXor(a, b);
case 61:
return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, c)), ir.BitwiseXor(a, b));
case 62:
return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, b));
case 63:
return ir.BitwiseNot(ir.BitwiseAnd(a, b));
case 64:
return ir.BitwiseAnd(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
case 65:
return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
case 66:
return ir.BitwiseAnd(ir.BitwiseXor(a, c), ir.BitwiseXor(b, c));
case 67:
return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b)));
case 68:
return ir.BitwiseAnd(b, ir.BitwiseNot(c));
case 69:
return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
case 70:
return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
case 71:
return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
case 72:
return ir.BitwiseAnd(b, ir.BitwiseXor(a, c));
case 73:
return ir.BitwiseXor(ir.BitwiseOr(a, c),
ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)));
case 74:
return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c));
case 75:
return ir.BitwiseXor(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
case 76:
return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
case 77:
return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)),
ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
case 78:
return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(b, c));
case 79:
return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(c)), ir.BitwiseNot(a));
case 80:
return ir.BitwiseAnd(a, ir.BitwiseNot(c));
case 81:
return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, ir.BitwiseNot(b)));
case 82:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
case 83:
return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
case 84:
return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
case 85:
return ir.BitwiseNot(c);
case 86:
return ir.BitwiseXor(c, ir.BitwiseOr(a, b));
case 87:
return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)));
case 88:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c));
case 89:
return ir.BitwiseXor(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
case 90:
return ir.BitwiseXor(a, c);
case 91:
return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(a, c));
case 92:
return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(a, b));
case 93:
return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseNot(c));
case 94:
return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, c));
case 95:
return ir.BitwiseNot(ir.BitwiseAnd(a, c));
case 96:
return ir.BitwiseAnd(a, ir.BitwiseXor(b, c));
case 97:
return ir.BitwiseXor(ir.BitwiseOr(b, c),
ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)));
case 98:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c));
case 99:
return ir.BitwiseXor(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
case 100:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c));
case 101:
return ir.BitwiseXor(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
case 102:
return ir.BitwiseXor(b, c);
case 103:
return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(b, c));
case 104:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(c, ir.BitwiseAnd(a, b)));
case 105:
return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
case 106:
return ir.BitwiseXor(c, ir.BitwiseAnd(a, b));
case 107:
return ir.BitwiseXor(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)),
ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 108:
return ir.BitwiseXor(b, ir.BitwiseAnd(a, c));
case 109:
return ir.BitwiseXor(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)),
ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 110:
return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
case 111:
return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
case 112:
return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
case 113:
return ir.BitwiseXor(ir.BitwiseOr(b, ir.BitwiseNot(a)),
ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
case 114:
return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, c));
case 115:
return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseNot(b));
case 116:
return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, b));
case 117:
return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseNot(c));
case 118:
return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
case 119:
return ir.BitwiseNot(ir.BitwiseAnd(b, c));
case 120:
return ir.BitwiseXor(a, ir.BitwiseAnd(b, c));
case 121:
return ir.BitwiseXor(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)),
ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 122:
return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
case 123:
return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
case 124:
return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
case 125:
return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
case 126:
return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
case 127:
return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseAnd(b, c)));
case 128:
return ir.BitwiseAnd(a, ir.BitwiseAnd(b, c));
case 129:
return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
case 130:
return ir.BitwiseAnd(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 131:
return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 132:
return ir.BitwiseAnd(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 133:
return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 134:
return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
case 135:
return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
case 136:
return ir.BitwiseAnd(b, c);
case 137:
return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 138:
return ir.BitwiseAnd(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
case 139:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
case 140:
return ir.BitwiseAnd(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
case 141:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, c)));
case 142:
return ir.BitwiseXor(a, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
case 143:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
case 144:
return ir.BitwiseAnd(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 145:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 146:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
case 147:
return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
case 148:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
case 149:
return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
case 150:
return ir.BitwiseXor(a, ir.BitwiseXor(b, c));
case 151:
return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)),
ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
case 152:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 153:
return ir.BitwiseXor(b, ir.BitwiseNot(c));
case 154:
return ir.BitwiseXor(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
case 155:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c)));
case 156:
return ir.BitwiseXor(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
case 157:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c)));
case 158:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseOr(b, c)));
case 159:
return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseXor(b, c)));
case 160:
return ir.BitwiseAnd(a, c);
case 161:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 162:
return ir.BitwiseAnd(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
case 163:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
case 164:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 165:
return ir.BitwiseXor(a, ir.BitwiseNot(c));
case 166:
return ir.BitwiseXor(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
case 167:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c)));
case 168:
return ir.BitwiseAnd(c, ir.BitwiseOr(a, b));
case 169:
return ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
case 170:
return c;
case 171:
return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
case 172:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
case 173:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 174:
return ir.BitwiseOr(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
case 175:
return ir.BitwiseOr(c, ir.BitwiseNot(a));
case 176:
return ir.BitwiseAnd(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
case 177:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(b, c)));
case 178:
return ir.BitwiseXor(b, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
case 179:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
case 180:
return ir.BitwiseXor(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
case 181:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c)));
case 182:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseOr(a, c)));
case 183:
return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseXor(a, c)));
case 184:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
case 185:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 186:
return ir.BitwiseOr(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
case 187:
return ir.BitwiseOr(c, ir.BitwiseNot(b));
case 188:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b));
case 189:
return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 190:
return ir.BitwiseOr(c, ir.BitwiseXor(a, b));
case 191:
return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
case 192:
return ir.BitwiseAnd(a, b);
case 193:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 194:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 195:
return ir.BitwiseXor(a, ir.BitwiseNot(b));
case 196:
return ir.BitwiseAnd(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
case 197:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(a, c)));
case 198:
return ir.BitwiseXor(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
case 199:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b)));
case 200:
return ir.BitwiseAnd(b, ir.BitwiseOr(a, c));
case 201:
return ir.BitwiseXor(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
case 202:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
case 203:
return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 204:
return b;
case 205:
return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
case 206:
return ir.BitwiseOr(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
case 207:
return ir.BitwiseOr(b, ir.BitwiseNot(a));
case 208:
return ir.BitwiseAnd(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
case 209:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(b, c)));
case 210:
return ir.BitwiseXor(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
case 211:
return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b)));
case 212:
return ir.BitwiseXor(c, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
case 213:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
case 214:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(c, ir.BitwiseOr(a, b)));
case 215:
return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseXor(a, b)));
case 216:
return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
case 217:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 218:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c));
case 219:
return ir.BitwiseOr(ir.BitwiseXor(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 220:
return ir.BitwiseOr(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
case 221:
return ir.BitwiseOr(b, ir.BitwiseNot(c));
case 222:
return ir.BitwiseOr(b, ir.BitwiseXor(a, c));
case 223:
return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
case 224:
return ir.BitwiseAnd(a, ir.BitwiseOr(b, c));
case 225:
return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
case 226:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
case 227:
return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 228:
return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
case 229:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 230:
return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c));
case 231:
return ir.BitwiseOr(ir.BitwiseXor(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
case 232:
return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
case 233:
return ir.BitwiseOr(ir.BitwiseAnd(a, b),
ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b)));
case 234:
return ir.BitwiseOr(c, ir.BitwiseAnd(a, b));
case 235:
return ir.BitwiseOr(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
case 236:
return ir.BitwiseOr(b, ir.BitwiseAnd(a, c));
case 237:
return ir.BitwiseOr(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
case 238:
return ir.BitwiseOr(b, c);
case 239:
return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
case 240:
return a;
case 241:
return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
case 242:
return ir.BitwiseOr(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
case 243:
return ir.BitwiseOr(a, ir.BitwiseNot(b));
case 244:
return ir.BitwiseOr(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
case 245:
return ir.BitwiseOr(a, ir.BitwiseNot(c));
case 246:
return ir.BitwiseOr(a, ir.BitwiseXor(b, c));
case 247:
return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
case 248:
return ir.BitwiseOr(a, ir.BitwiseAnd(b, c));
case 249:
return ir.BitwiseOr(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
case 250:
return ir.BitwiseOr(a, c);
case 251:
return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
case 252:
return ir.BitwiseOr(a, b);
case 253:
return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
case 254:
return ir.BitwiseOr(a, ir.BitwiseOr(b, c));
case 255:
return ir.Imm32(0xFFFFFFFF);
// end of generated code
std::optional<IR::U32> value;
// Encode into gray code.
u32 map = ttbl & 1;
map |= ((ttbl >> 1) & 1) << 4;
map |= ((ttbl >> 2) & 1) << 1;
map |= ((ttbl >> 3) & 1) << 5;
map |= ((ttbl >> 4) & 1) << 3;
map |= ((ttbl >> 5) & 1) << 7;
map |= ((ttbl >> 6) & 1) << 2;
map |= ((ttbl >> 7) & 1) << 6;
u32 visited = 0;
for (u32 index = 0; index < 8 && visited != 0xff; index++) {
if ((map & (1 << index)) == 0) {
continue;
}
const auto RotateLeft4 = [](u32 value, u32 shift) {
return ((value << shift) | (value >> (4 - shift))) & 0xf;
};
u32 mask = 0;
for (u32 size = 4; size != 0; size >>= 1) {
mask = RotateLeft4((1 << size) - 1, index & 3) << (index & 4);
if ((map & mask) == mask) {
break;
}
}
// The mask should wrap, if we are on the high row, shift to low etc.
const u32 mask2 = (index & 4) != 0 ? mask >> 4 : mask << 4;
if ((map & mask2) == mask2) {
mask |= mask2;
}
if ((mask & visited) == mask) {
continue;
}
const bool not_a = (mask & 0x33) != 0;
const bool not_b = (mask & 0x99) != 0;
const bool not_c = (mask & 0x0f) != 0;
const bool a_changes = (mask & 0xcc) != 0 && not_a;
const bool b_changes = (mask & 0x66) != 0 && not_b;
const bool c_changes = (mask & 0xf0) != 0 && not_c;
std::optional<IR::U32> local_value;
const auto And = [&](const IR::U32& source, bool inverted) {
IR::U32 result = inverted ? ir.BitwiseNot(source) : source;
if (local_value) {
local_value = ir.BitwiseAnd(*local_value, result);
} else {
local_value = result;
}
};
if (!a_changes) {
And(a, not_a);
}
if (!b_changes) {
And(b, not_b);
}
if (!c_changes) {
And(c, not_c);
}
if (value) {
value = ir.BitwiseOr(*value, *local_value);
} else {
value = local_value;
}
visited |= mask;
}
throw NotImplementedException("LOP3 with out of range ttbl");
return *value;
}
IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {

View File

@@ -1,92 +0,0 @@
# Copyright © 2022 degasus <markus@selfnet.de>
# This work is free. You can redistribute it and/or modify it under the
# terms of the Do What The Fuck You Want To Public License, Version 2,
# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
from itertools import product
# The primitive instructions
OPS = {
'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b),
'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b),
'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b),
'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions
}
# Our database of combination of instructions
optimized_calls = {}
def cmp(lhs, rhs):
if lhs is None: # new entry
return True
if lhs[3] > rhs[3]: # costs
return True
if lhs[3] < rhs[3]: # costs
return False
if len(lhs[0]) > len(rhs[0]): # string len
return True
if len(lhs[0]) < len(rhs[0]): # string len
return False
if lhs[0] > rhs[0]: # string sorting
return True
if lhs[0] < rhs[0]: # string sorting
return False
assert lhs == rhs, "redundant instruction, bug in brute force"
return False
def register(imm, instruction, count, latency):
# Use the sum of instruction count and latency as costs to evaluate which combination is best
costs = count + latency
old = optimized_calls.get(imm, None)
new = (instruction, count, latency, costs)
# Update if new or better
if cmp(old, new):
optimized_calls[imm] = new
return True
return False
# Constants: 0, 1 (for free)
register(0, 'ir.Imm32(0)', 0, 0)
register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0)
# Inputs: a, b, c (for free)
ta = 0xF0
tb = 0xCC
tc = 0xAA
inputs = {
ta : 'a',
tb : 'b',
tc : 'c',
}
for imm, instruction in inputs.items():
register(imm, instruction, 0, 0)
register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs
# Try to combine two values from the db with an instruction.
# If it is better than the old method, update it.
while True:
registered = 0
calls_copy = optimized_calls.copy()
for OP, (argc, cost, f) in OPS.items():
for args in product(calls_copy.items(), repeat=argc):
# unpack(transponse) the arrays
imm = [arg[0] for arg in args]
value = [arg[1][0] for arg in args]
count = [arg[1][1] for arg in args]
latency = [arg[1][2] for arg in args]
registered += register(
f(*imm),
OP.format(*value),
sum(count) + cost,
max(latency) + cost)
if registered == 0:
# No update at all? So terminate
break
# Hacky output. Please improve me to output valid C++ instead.
s = """ case {imm}:
return {op};"""
for imm in range(256):
print(s.format(imm=imm, op=optimized_calls[imm][0]))

View File

@@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
}
Optimization::SsaRewritePass(program);
Optimization::ConstantPropagationPass(program);
Optimization::GlobalMemoryToStorageBufferPass(program);
Optimization::TexturePass(env, program);
Optimization::ConstantPropagationPass(program);
if (Settings::values.resolution_info.active) {
Optimization::RescalingPass(program);
}

View File

@@ -334,8 +334,7 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
/// Tries to track the storage buffer address used by a global memory instruction
std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
if (inst->GetOpcode() != IR::Opcode::GetCbufU32 &&
inst->GetOpcode() != IR::Opcode::GetCbufU32x2) {
if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
return std::nullopt;
}
const IR::Value index{inst->Arg(0)};

View File

@@ -183,31 +183,6 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s
}
}
void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
size_t index) {
const IR::Value composite{inst.Arg(index)};
if (composite.IsEmpty()) {
return;
}
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
switch (info.type) {
case TextureType::ColorArray2D:
case TextureType::Color2D:
inst.SetArg(index, ir.CompositeConstruct(x, y));
break;
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::Value coord{inst.Arg(1)};
@@ -245,7 +220,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
SubScaleCoord(ir, inst, is_scaled);
// Scale ImageFetch offset
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
ScaleIntegerComposite(ir, inst, is_scaled, 2);
}
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
@@ -267,7 +242,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
ScaleIntegerComposite(ir, inst, is_scaled, 1);
// Scale ImageFetch offset
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
ScaleIntegerComposite(ir, inst, is_scaled, 2);
}
void PatchImageRead(IR::Block& block, IR::Inst& inst) {

View File

@@ -7,7 +7,6 @@
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@@ -209,14 +208,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return ProcessCBBind(4);
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
return DrawArrays();
case MAXWELL3D_REG_INDEX(small_index):
regs.index_array.count = regs.small_index.count;
regs.index_array.first = regs.small_index.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
return DrawArrays();
case MAXWELL3D_REG_INDEX(topology_override):
use_topology_override = true;
return;
case MAXWELL3D_REG_INDEX(clear_buffers):
return ProcessClearBuffers();
case MAXWELL3D_REG_INDEX(query.query_get):
@@ -369,35 +360,6 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
}
}
void Maxwell3D::ProcessTopologyOverride() {
using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
PrimitiveTopology topology{};
switch (regs.topology_override) {
case PrimitiveTopologyOverride::None:
topology = regs.draw.topology;
break;
case PrimitiveTopologyOverride::Points:
topology = PrimitiveTopology::Points;
break;
case PrimitiveTopologyOverride::Lines:
topology = PrimitiveTopology::Lines;
break;
case PrimitiveTopologyOverride::LineStrip:
topology = PrimitiveTopology::LineStrip;
break;
default:
topology = static_cast<PrimitiveTopology>(regs.topology_override);
break;
}
if (use_topology_override) {
regs.draw.topology.Assign(topology);
}
}
void Maxwell3D::FlushMMEInlineDraw() {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
regs.vertex_buffer.count);
@@ -408,8 +370,6 @@ void Maxwell3D::FlushMMEInlineDraw() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
ProcessTopologyOverride();
const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
if (ShouldExecute()) {
rasterizer->Draw(is_indexed, true);
@@ -569,8 +529,6 @@ void Maxwell3D::DrawArrays() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
ProcessTopologyOverride();
if (regs.draw.instance_next) {
// Increment the current instance *before* drawing.
state.current_instance += 1;

View File

@@ -367,22 +367,6 @@ public:
Patches = 0xe,
};
// Constants as from NVC0_3D_UNK1970_D3D
// https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598
enum class PrimitiveTopologyOverride : u32 {
None = 0x0,
Points = 0x1,
Lines = 0x2,
LineStrip = 0x3,
Triangles = 0x4,
TriangleStrip = 0x5,
LinesAdjacency = 0xa,
LineStripAdjacency = 0xb,
TrianglesAdjacency = 0xc,
TriangleStripAdjacency = 0xd,
Patches = 0xe,
};
enum class IndexFormat : u32 {
UnsignedByte = 0x0,
UnsignedShort = 0x1,
@@ -1216,12 +1200,7 @@ public:
}
} index_array;
union {
BitField<0, 16, u32> first;
BitField<16, 16, u32> count;
} small_index;
INSERT_PADDING_WORDS_NOINIT(0x6);
INSERT_PADDING_WORDS_NOINIT(0x7);
INSERT_PADDING_WORDS_NOINIT(0x1F);
@@ -1265,11 +1244,7 @@ public:
BitField<11, 1, u32> depth_clamp_disabled;
} view_volume_clip_control;
INSERT_PADDING_WORDS_NOINIT(0xC);
PrimitiveTopologyOverride topology_override;
INSERT_PADDING_WORDS_NOINIT(0x12);
INSERT_PADDING_WORDS_NOINIT(0x1F);
u32 depth_bounds_enable;
@@ -1556,9 +1531,6 @@ private:
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();
/// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
void ProcessTopologyOverride();
// Handles a instance drawcall from MME
void StepInstance(MMEDrawMode expected_mode, u32 count);
@@ -1597,7 +1569,6 @@ private:
Upload::State upload_state;
bool execute_on{true};
bool use_topology_override{false};
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -1714,7 +1685,6 @@ ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(small_index, 0x5F9);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(vp_point_size, 0x644);
@@ -1724,7 +1694,6 @@ ASSERT_REG_POSITION(cull_face, 0x648);
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
ASSERT_REG_POSITION(topology_override, 0x65C);
ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);

View File

@@ -53,6 +53,7 @@ void MaxwellDMA::Launch() {
// TODO(Subv): Perform more research and implement all features of this engine.
const LaunchDMA& launch = regs.launch_dma;
ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
ASSERT(regs.dst_params.origin.x == 0);
@@ -78,7 +79,6 @@ void MaxwellDMA::Launch() {
CopyPitchToBlockLinear();
}
}
ReleaseSemaphore();
}
void MaxwellDMA::CopyPitchToPitch() {
@@ -244,22 +244,4 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::ReleaseSemaphore() {
const auto type = regs.launch_dma.semaphore_type;
const GPUVAddr address = regs.semaphore.address;
switch (type) {
case LaunchDMA::SemaphoreType::NONE:
break;
case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE:
memory_manager.Write<u32>(address, regs.semaphore.payload);
break;
case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE:
memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload));
memory_manager.Write<u64>(address + 8, system.GPU().GetTicks());
break;
default:
UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value()));
}
}
} // namespace Tegra::Engines

View File

@@ -224,8 +224,6 @@ private:
void FastCopyBlockLinearToPitch();
void ReleaseSemaphore();
Core::System& system;
MemoryManager& memory_manager;

View File

@@ -14,7 +14,6 @@ set(SHADER_FILES
convert_d24s8_to_abgr8.frag
convert_depth_to_float.frag
convert_float_to_depth.frag
convert_s8d24_to_abgr8.frag
full_screen_triangle.vert
fxaa.frag
fxaa.vert

View File

@@ -1,23 +0,0 @@
// Copyright 2022 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 450
layout(binding = 0) uniform sampler2D depth_tex;
layout(binding = 1) uniform isampler2D stencil_tex;
layout(location = 0) out vec4 color;
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
highp uint depth_val =
uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
highp uvec4 components =
uvec4((uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu, stencil_val);
color.rgba = vec4(components) / (exp2(8.0) - 1.0);
}

View File

@@ -9,7 +9,6 @@
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
#include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
@@ -371,7 +370,6 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
if (device.IsExtShaderStencilExportSupported()) {
@@ -476,13 +474,6 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
ImageView& src_image_view) {
ConvertPipelineColorTargetEx(convert_s8d24_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
convert_s8d24_to_abgr8_frag);
ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;

View File

@@ -56,8 +56,6 @@ public:
void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
@@ -101,7 +99,6 @@ private:
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
vk::ShaderModule convert_d24s8_to_abgr8_frag;
vk::ShaderModule convert_s8d24_to_abgr8_frag;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
@@ -115,7 +112,6 @@ private:
vk::Pipeline convert_r16_to_d16_pipeline;
vk::Pipeline convert_abgr8_to_d24s8_pipeline;
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
vk::Pipeline convert_s8d24_to_abgr8_pipeline;
};
} // namespace Vulkan

View File

@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <cstring>
#include <memory>
#include <optional>
@@ -293,7 +292,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
const std::array<u32, 2> push_constants{base_vertex, index_shift};
const std::array push_constants{base_vertex, index_shift};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);

View File

@@ -1070,9 +1070,6 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
}
if (src_view.format == PixelFormat::D24_UNORM_S8_UINT) {
return blit_image_helper.ConvertS8D24ToABGR8(dst, src_view);
}
break;
case PixelFormat::R32_FLOAT:
if (src_view.format == PixelFormat::D32_FLOAT) {

View File

@@ -50,7 +50,6 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
gpu->BindRenderer(std::move(renderer));
return gpu;
} catch (const std::runtime_error& exception) {
scope.Cancel();
LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
return nullptr;
}

View File

@@ -1155,8 +1155,6 @@ void Config::SaveCpuValues() {
WriteBasicSetting(Settings::values.cpuopt_misc_ir);
WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
WriteBasicSetting(Settings::values.cpuopt_fastmem);
WriteBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
WriteBasicSetting(Settings::values.cpuopt_recompile_exclusives);
}
qt_config->endGroup();

View File

@@ -249,9 +249,9 @@ GMainWindow::GMainWindow()
#ifdef ARCHITECTURE_x86_64
const auto& caps = Common::GetCPUCaps();
std::string cpu_string = caps.cpu_string;
if (caps.avx || caps.avx2 || caps.avx512f) {
if (caps.avx || caps.avx2 || caps.avx512) {
cpu_string += " | AVX";
if (caps.avx512f) {
if (caps.avx512) {
cpu_string += "512";
} else if (caps.avx2) {
cpu_string += '2';