Compare commits
67 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e394e1ecc4 | ||
|
|
90a0506d56 | ||
|
|
ad58d7eae7 | ||
|
|
11099dda2e | ||
|
|
64a68ccbb4 | ||
|
|
4790ba7839 | ||
|
|
1900abde13 | ||
|
|
60b5670577 | ||
|
|
b00406c8e4 | ||
|
|
8dea7fa129 | ||
|
|
2241d8c971 | ||
|
|
beaf7654bb | ||
|
|
0dec42431f | ||
|
|
e4c63d432d | ||
|
|
f300a1d54b | ||
|
|
3a1a3dd0db | ||
|
|
74e6e3623f | ||
|
|
8a244dd3d3 | ||
|
|
fd1cef5616 | ||
|
|
adcac857f8 | ||
|
|
f6a049337e | ||
|
|
40050c1188 | ||
|
|
9bf7ad97f5 | ||
|
|
5723145165 | ||
|
|
84cc22b21b | ||
|
|
c93dd45997 | ||
|
|
a1c4bca908 | ||
|
|
432f4441b9 | ||
|
|
306b3491c4 | ||
|
|
8dbad556ec | ||
|
|
a8a4f37628 | ||
|
|
81d1a1133d | ||
|
|
cfd9f7d25b | ||
|
|
a05d9405b9 | ||
|
|
74f80299b0 | ||
|
|
f11eefed56 | ||
|
|
6b873b72ae | ||
|
|
a3c81745b1 | ||
|
|
d8486a9968 | ||
|
|
9b38c8ef08 | ||
|
|
e7af84670d | ||
|
|
b46ec4efea | ||
|
|
4f9f55ec21 | ||
|
|
3442365127 | ||
|
|
651358d0b6 | ||
|
|
187c9d7e33 | ||
|
|
0849be094e | ||
|
|
8bb39750a1 | ||
|
|
12e7d3b254 | ||
|
|
51dd3da11c | ||
|
|
87eb3cb083 | ||
|
|
b084a9bf0a | ||
|
|
2f12caccf9 | ||
|
|
b998aa5504 | ||
|
|
fdde08bd01 | ||
|
|
59add00d4a | ||
|
|
e791da9791 | ||
|
|
07add23251 | ||
|
|
ee25e0a40b | ||
|
|
0cee5e1af8 | ||
|
|
ffcaf5af90 | ||
|
|
2935c9d8de | ||
|
|
264bb5abf7 | ||
|
|
0137f2e6e1 | ||
|
|
6d8e498f76 | ||
|
|
b8b1b58f36 | ||
|
|
7d133fd37e |
36
externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h
vendored
36
externals/FidelityFX-FSR/ffx-fsr/ffx_fsr1.h
vendored
@@ -747,12 +747,12 @@ AF1 sharpness){
|
||||
// Immediate constants for peak range.
|
||||
AF2 peakC=AF2(1.0,-1.0*4.0);
|
||||
// Limiters, these need to be high precision RCPs.
|
||||
AF1 hitMinR=mn4R*ARcpF1(AF1_(4.0)*mx4R);
|
||||
AF1 hitMinG=mn4G*ARcpF1(AF1_(4.0)*mx4G);
|
||||
AF1 hitMinB=mn4B*ARcpF1(AF1_(4.0)*mx4B);
|
||||
AF1 hitMaxR=(peakC.x-mx4R)*ARcpF1(AF1_(4.0)*mn4R+peakC.y);
|
||||
AF1 hitMaxG=(peakC.x-mx4G)*ARcpF1(AF1_(4.0)*mn4G+peakC.y);
|
||||
AF1 hitMaxB=(peakC.x-mx4B)*ARcpF1(AF1_(4.0)*mn4B+peakC.y);
|
||||
AF1 hitMinR=min(mn4R,eR)*ARcpF1(AF1_(4.0)*mx4R);
|
||||
AF1 hitMinG=min(mn4G,eG)*ARcpF1(AF1_(4.0)*mx4G);
|
||||
AF1 hitMinB=min(mn4B,eB)*ARcpF1(AF1_(4.0)*mx4B);
|
||||
AF1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpF1(AF1_(4.0)*mn4R+peakC.y);
|
||||
AF1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpF1(AF1_(4.0)*mn4G+peakC.y);
|
||||
AF1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpF1(AF1_(4.0)*mn4B+peakC.y);
|
||||
AF1 lobeR=max(-hitMinR,hitMaxR);
|
||||
AF1 lobeG=max(-hitMinG,hitMaxG);
|
||||
AF1 lobeB=max(-hitMinB,hitMaxB);
|
||||
@@ -845,12 +845,12 @@ AF1 sharpness){
|
||||
// Immediate constants for peak range.
|
||||
AH2 peakC=AH2(1.0,-1.0*4.0);
|
||||
// Limiters, these need to be high precision RCPs.
|
||||
AH1 hitMinR=mn4R*ARcpH1(AH1_(4.0)*mx4R);
|
||||
AH1 hitMinG=mn4G*ARcpH1(AH1_(4.0)*mx4G);
|
||||
AH1 hitMinB=mn4B*ARcpH1(AH1_(4.0)*mx4B);
|
||||
AH1 hitMaxR=(peakC.x-mx4R)*ARcpH1(AH1_(4.0)*mn4R+peakC.y);
|
||||
AH1 hitMaxG=(peakC.x-mx4G)*ARcpH1(AH1_(4.0)*mn4G+peakC.y);
|
||||
AH1 hitMaxB=(peakC.x-mx4B)*ARcpH1(AH1_(4.0)*mn4B+peakC.y);
|
||||
AH1 hitMinR=min(mn4R,eR)*ARcpH1(AH1_(4.0)*mx4R);
|
||||
AH1 hitMinG=min(mn4G,eG)*ARcpH1(AH1_(4.0)*mx4G);
|
||||
AH1 hitMinB=min(mn4B,eB)*ARcpH1(AH1_(4.0)*mx4B);
|
||||
AH1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH1(AH1_(4.0)*mn4R+peakC.y);
|
||||
AH1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH1(AH1_(4.0)*mn4G+peakC.y);
|
||||
AH1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH1(AH1_(4.0)*mn4B+peakC.y);
|
||||
AH1 lobeR=max(-hitMinR,hitMaxR);
|
||||
AH1 lobeG=max(-hitMinG,hitMaxG);
|
||||
AH1 lobeB=max(-hitMinB,hitMaxB);
|
||||
@@ -963,12 +963,12 @@ AF1 sharpness){
|
||||
// Immediate constants for peak range.
|
||||
AH2 peakC=AH2(1.0,-1.0*4.0);
|
||||
// Limiters, these need to be high precision RCPs.
|
||||
AH2 hitMinR=mn4R*ARcpH2(AH2_(4.0)*mx4R);
|
||||
AH2 hitMinG=mn4G*ARcpH2(AH2_(4.0)*mx4G);
|
||||
AH2 hitMinB=mn4B*ARcpH2(AH2_(4.0)*mx4B);
|
||||
AH2 hitMaxR=(peakC.x-mx4R)*ARcpH2(AH2_(4.0)*mn4R+peakC.y);
|
||||
AH2 hitMaxG=(peakC.x-mx4G)*ARcpH2(AH2_(4.0)*mn4G+peakC.y);
|
||||
AH2 hitMaxB=(peakC.x-mx4B)*ARcpH2(AH2_(4.0)*mn4B+peakC.y);
|
||||
AH2 hitMinR=min(mn4R,eR)*ARcpH2(AH2_(4.0)*mx4R);
|
||||
AH2 hitMinG=min(mn4G,eG)*ARcpH2(AH2_(4.0)*mx4G);
|
||||
AH2 hitMinB=min(mn4B,eB)*ARcpH2(AH2_(4.0)*mx4B);
|
||||
AH2 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH2(AH2_(4.0)*mn4R+peakC.y);
|
||||
AH2 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH2(AH2_(4.0)*mn4G+peakC.y);
|
||||
AH2 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH2(AH2_(4.0)*mn4B+peakC.y);
|
||||
AH2 lobeR=max(-hitMinR,hitMaxR);
|
||||
AH2 lobeG=max(-hitMinG,hitMaxG);
|
||||
AH2 lobeB=max(-hitMinB,hitMaxB);
|
||||
|
||||
@@ -554,6 +554,7 @@ struct Values {
|
||||
Setting<bool> use_docked_mode{true, "use_docked_mode"};
|
||||
|
||||
BasicSetting<bool> enable_raw_input{false, "enable_raw_input"};
|
||||
BasicSetting<bool> controller_navigation{true, "controller_navigation"};
|
||||
|
||||
Setting<bool> vibration_enabled{true, "vibration_enabled"};
|
||||
Setting<bool> enable_accurate_vibrations{false, "enable_accurate_vibrations"};
|
||||
|
||||
@@ -72,7 +72,9 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
|
||||
if (caps.invariant_tsc) {
|
||||
rtsc_frequency = EstimateRDTSCFrequency();
|
||||
}
|
||||
if (rtsc_frequency == 0) {
|
||||
|
||||
// Fallback to StandardWallClock if rtsc period is higher than a nano second
|
||||
if (rtsc_frequency <= 1000000000) {
|
||||
return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
|
||||
emulated_clock_frequency);
|
||||
} else {
|
||||
|
||||
@@ -37,12 +37,12 @@ constexpr Xbyak::Reg IndexToReg(size_t reg_index) {
|
||||
}
|
||||
}
|
||||
|
||||
inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
|
||||
std::bitset<32> bits;
|
||||
constexpr std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
|
||||
size_t bits = 0;
|
||||
for (const Xbyak::Reg& reg : regs) {
|
||||
bits[RegToIndex(reg)] = true;
|
||||
bits |= size_t{1} << RegToIndex(reg);
|
||||
}
|
||||
return bits;
|
||||
return {bits};
|
||||
}
|
||||
|
||||
constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
|
||||
@@ -57,7 +57,7 @@ constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||
constexpr inline std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rcx,
|
||||
Xbyak::util::rdx,
|
||||
@@ -74,7 +74,7 @@ const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||
Xbyak::util::xmm5,
|
||||
});
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
||||
constexpr inline std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rbx,
|
||||
Xbyak::util::rsi,
|
||||
@@ -108,7 +108,7 @@ constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
|
||||
constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||
constexpr inline std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rcx,
|
||||
Xbyak::util::rdx,
|
||||
@@ -137,7 +137,7 @@ const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||
Xbyak::util::xmm15,
|
||||
});
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
||||
constexpr inline std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rbx,
|
||||
Xbyak::util::rbp,
|
||||
|
||||
@@ -31,8 +31,6 @@ public:
|
||||
}
|
||||
|
||||
constexpr void SetAffinity(s32 core, bool set) {
|
||||
ASSERT(0 <= core && core < static_cast<s32>(Core::Hardware::NUM_CPU_CORES));
|
||||
|
||||
if (set) {
|
||||
this->mask |= GetCoreBit(core);
|
||||
} else {
|
||||
|
||||
@@ -276,22 +276,23 @@ ResultCode KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_
|
||||
|
||||
ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemoryState state,
|
||||
KMemoryPermission perm) {
|
||||
std::lock_guard lock{page_table_lock};
|
||||
|
||||
const u64 size{num_pages * PageSize};
|
||||
|
||||
if (!CanContain(addr, size, state)) {
|
||||
return ResultInvalidCurrentMemory;
|
||||
}
|
||||
// Validate the mapping request.
|
||||
R_UNLESS(this->CanContain(addr, size, state), ResultInvalidCurrentMemory);
|
||||
|
||||
if (IsRegionMapped(addr, size)) {
|
||||
return ResultInvalidCurrentMemory;
|
||||
}
|
||||
// Lock the table.
|
||||
std::lock_guard lock{page_table_lock};
|
||||
|
||||
// Verify that the destination memory is unmapped.
|
||||
R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, KMemoryState::Free,
|
||||
KMemoryPermission::None, KMemoryPermission::None,
|
||||
KMemoryAttribute::None, KMemoryAttribute::None));
|
||||
|
||||
KPageLinkedList page_linked_list;
|
||||
CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool,
|
||||
allocation_option));
|
||||
CASCADE_CODE(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup));
|
||||
R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool,
|
||||
allocation_option));
|
||||
R_TRY(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup));
|
||||
|
||||
block_manager->Update(addr, num_pages, state, perm);
|
||||
|
||||
@@ -395,39 +396,12 @@ ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size,
|
||||
|
||||
return ResultSuccess;
|
||||
}
|
||||
void KPageTable::MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end) {
|
||||
auto node{page_linked_list.Nodes().begin()};
|
||||
PAddr map_addr{node->GetAddress()};
|
||||
std::size_t src_num_pages{node->GetNumPages()};
|
||||
|
||||
block_manager->IterateForRange(start, end, [&](const KMemoryInfo& info) {
|
||||
if (info.state != KMemoryState::Free) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::size_t dst_num_pages{GetSizeInRange(info, start, end) / PageSize};
|
||||
VAddr dst_addr{GetAddressInRange(info, start)};
|
||||
|
||||
while (dst_num_pages) {
|
||||
if (!src_num_pages) {
|
||||
node = std::next(node);
|
||||
map_addr = node->GetAddress();
|
||||
src_num_pages = node->GetNumPages();
|
||||
}
|
||||
|
||||
const std::size_t num_pages{std::min(src_num_pages, dst_num_pages)};
|
||||
Operate(dst_addr, num_pages, KMemoryPermission::UserReadWrite, OperationType::Map,
|
||||
map_addr);
|
||||
|
||||
dst_addr += num_pages * PageSize;
|
||||
map_addr += num_pages * PageSize;
|
||||
src_num_pages -= num_pages;
|
||||
dst_num_pages -= num_pages;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
|
||||
// Lock the physical memory lock.
|
||||
std::lock_guard phys_lk(map_physical_memory_lock);
|
||||
|
||||
// Lock the table.
|
||||
std::lock_guard lock{page_table_lock};
|
||||
|
||||
std::size_t mapped_size{};
|
||||
@@ -463,7 +437,35 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
|
||||
// We succeeded, so commit the memory reservation.
|
||||
memory_reservation.Commit();
|
||||
|
||||
MapPhysicalMemory(page_linked_list, addr, end_addr);
|
||||
// Map the memory.
|
||||
auto node{page_linked_list.Nodes().begin()};
|
||||
PAddr map_addr{node->GetAddress()};
|
||||
std::size_t src_num_pages{node->GetNumPages()};
|
||||
block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) {
|
||||
if (info.state != KMemoryState::Free) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::size_t dst_num_pages{GetSizeInRange(info, addr, end_addr) / PageSize};
|
||||
VAddr dst_addr{GetAddressInRange(info, addr)};
|
||||
|
||||
while (dst_num_pages) {
|
||||
if (!src_num_pages) {
|
||||
node = std::next(node);
|
||||
map_addr = node->GetAddress();
|
||||
src_num_pages = node->GetNumPages();
|
||||
}
|
||||
|
||||
const std::size_t num_pages{std::min(src_num_pages, dst_num_pages)};
|
||||
Operate(dst_addr, num_pages, KMemoryPermission::UserReadWrite, OperationType::Map,
|
||||
map_addr);
|
||||
|
||||
dst_addr += num_pages * PageSize;
|
||||
map_addr += num_pages * PageSize;
|
||||
src_num_pages -= num_pages;
|
||||
dst_num_pages -= num_pages;
|
||||
}
|
||||
});
|
||||
|
||||
mapped_physical_memory_size += remaining_size;
|
||||
|
||||
@@ -503,23 +505,8 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
|
||||
return ResultSuccess;
|
||||
}
|
||||
|
||||
CASCADE_CODE(UnmapMemory(addr, size));
|
||||
|
||||
auto process{system.Kernel().CurrentProcess()};
|
||||
process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size);
|
||||
mapped_physical_memory_size -= mapped_size;
|
||||
|
||||
return ResultSuccess;
|
||||
}
|
||||
|
||||
ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{page_table_lock};
|
||||
|
||||
const VAddr end_addr{addr + size};
|
||||
ResultCode result{ResultSuccess};
|
||||
KPageLinkedList page_linked_list;
|
||||
|
||||
// Unmap each region within the range
|
||||
KPageLinkedList page_linked_list;
|
||||
block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) {
|
||||
if (info.state == KMemoryState::Normal) {
|
||||
const std::size_t block_size{GetSizeInRange(info, addr, end_addr)};
|
||||
@@ -535,7 +522,6 @@ ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (result.IsError()) {
|
||||
return result;
|
||||
}
|
||||
@@ -546,10 +532,14 @@ ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) {
|
||||
|
||||
block_manager->Update(addr, num_pages, KMemoryState::Free);
|
||||
|
||||
auto process{system.Kernel().CurrentProcess()};
|
||||
process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size);
|
||||
mapped_physical_memory_size -= mapped_size;
|
||||
|
||||
return ResultSuccess;
|
||||
}
|
||||
|
||||
ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) {
|
||||
ResultCode KPageTable::MapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
|
||||
std::lock_guard lock{page_table_lock};
|
||||
|
||||
KMemoryState src_state{};
|
||||
@@ -588,7 +578,7 @@ ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) {
|
||||
return ResultSuccess;
|
||||
}
|
||||
|
||||
ResultCode KPageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) {
|
||||
ResultCode KPageTable::UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
|
||||
std::lock_guard lock{page_table_lock};
|
||||
|
||||
KMemoryState src_state{};
|
||||
@@ -652,24 +642,26 @@ ResultCode KPageTable::MapPages(VAddr addr, const KPageLinkedList& page_linked_l
|
||||
return ResultSuccess;
|
||||
}
|
||||
|
||||
ResultCode KPageTable::MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state,
|
||||
KMemoryPermission perm) {
|
||||
std::lock_guard lock{page_table_lock};
|
||||
|
||||
ResultCode KPageTable::MapPages(VAddr address, KPageLinkedList& page_linked_list,
|
||||
KMemoryState state, KMemoryPermission perm) {
|
||||
// Check that the map is in range.
|
||||
const std::size_t num_pages{page_linked_list.GetNumPages()};
|
||||
const std::size_t size{num_pages * PageSize};
|
||||
R_UNLESS(this->CanContain(address, size, state), ResultInvalidCurrentMemory);
|
||||
|
||||
if (!CanContain(addr, size, state)) {
|
||||
return ResultInvalidCurrentMemory;
|
||||
}
|
||||
// Lock the table.
|
||||
std::lock_guard lock{page_table_lock};
|
||||
|
||||
if (IsRegionMapped(addr, num_pages * PageSize)) {
|
||||
return ResultInvalidCurrentMemory;
|
||||
}
|
||||
// Check the memory state.
|
||||
R_TRY(this->CheckMemoryState(address, size, KMemoryState::All, KMemoryState::Free,
|
||||
KMemoryPermission::None, KMemoryPermission::None,
|
||||
KMemoryAttribute::None, KMemoryAttribute::None));
|
||||
|
||||
CASCADE_CODE(MapPages(addr, page_linked_list, perm));
|
||||
// Map the pages.
|
||||
R_TRY(MapPages(address, page_linked_list, perm));
|
||||
|
||||
block_manager->Update(addr, num_pages, state, perm);
|
||||
// Update the blocks.
|
||||
block_manager->Update(address, num_pages, state, perm);
|
||||
|
||||
return ResultSuccess;
|
||||
}
|
||||
@@ -693,21 +685,23 @@ ResultCode KPageTable::UnmapPages(VAddr addr, const KPageLinkedList& page_linked
|
||||
|
||||
ResultCode KPageTable::UnmapPages(VAddr addr, KPageLinkedList& page_linked_list,
|
||||
KMemoryState state) {
|
||||
std::lock_guard lock{page_table_lock};
|
||||
|
||||
// Check that the unmap is in range.
|
||||
const std::size_t num_pages{page_linked_list.GetNumPages()};
|
||||
const std::size_t size{num_pages * PageSize};
|
||||
R_UNLESS(this->Contains(addr, size), ResultInvalidCurrentMemory);
|
||||
|
||||
if (!CanContain(addr, size, state)) {
|
||||
return ResultInvalidCurrentMemory;
|
||||
}
|
||||
// Lock the table.
|
||||
std::lock_guard lock{page_table_lock};
|
||||
|
||||
if (IsRegionMapped(addr, num_pages * PageSize)) {
|
||||
return ResultInvalidCurrentMemory;
|
||||
}
|
||||
// Check the memory state.
|
||||
R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, state, KMemoryPermission::None,
|
||||
KMemoryPermission::None, KMemoryAttribute::All,
|
||||
KMemoryAttribute::None));
|
||||
|
||||
CASCADE_CODE(UnmapPages(addr, page_linked_list));
|
||||
// Perform the unmap.
|
||||
R_TRY(UnmapPages(addr, page_linked_list));
|
||||
|
||||
// Update the blocks.
|
||||
block_manager->Update(addr, num_pages, state, KMemoryPermission::None);
|
||||
|
||||
return ResultSuccess;
|
||||
@@ -765,7 +759,6 @@ ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size,
|
||||
|
||||
// Ensure cache coherency, if we're setting pages as executable.
|
||||
if (is_x) {
|
||||
// Memory execution state is changing, invalidate CPU cache range
|
||||
system.InvalidateCpuInstructionCacheRange(addr, size);
|
||||
}
|
||||
|
||||
@@ -793,12 +786,12 @@ ResultCode KPageTable::ReserveTransferMemory(VAddr addr, std::size_t size, KMemo
|
||||
KMemoryState state{};
|
||||
KMemoryAttribute attribute{};
|
||||
|
||||
CASCADE_CODE(CheckMemoryState(
|
||||
&state, nullptr, &attribute, nullptr, addr, size,
|
||||
KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
|
||||
KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::All,
|
||||
KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask, KMemoryAttribute::None,
|
||||
KMemoryAttribute::IpcAndDeviceMapped));
|
||||
R_TRY(CheckMemoryState(&state, nullptr, &attribute, nullptr, addr, size,
|
||||
KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
|
||||
KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
|
||||
KMemoryPermission::All, KMemoryPermission::UserReadWrite,
|
||||
KMemoryAttribute::Mask, KMemoryAttribute::None,
|
||||
KMemoryAttribute::IpcAndDeviceMapped));
|
||||
|
||||
block_manager->Update(addr, size / PageSize, state, perm, attribute | KMemoryAttribute::Locked);
|
||||
|
||||
@@ -810,12 +803,11 @@ ResultCode KPageTable::ResetTransferMemory(VAddr addr, std::size_t size) {
|
||||
|
||||
KMemoryState state{};
|
||||
|
||||
CASCADE_CODE(
|
||||
CheckMemoryState(&state, nullptr, nullptr, nullptr, addr, size,
|
||||
KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
|
||||
KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
|
||||
KMemoryPermission::None, KMemoryPermission::None, KMemoryAttribute::Mask,
|
||||
KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
|
||||
R_TRY(CheckMemoryState(&state, nullptr, nullptr, nullptr, addr, size,
|
||||
KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
|
||||
KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
|
||||
KMemoryPermission::None, KMemoryPermission::None, KMemoryAttribute::Mask,
|
||||
KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
|
||||
|
||||
block_manager->Update(addr, size / PageSize, state, KMemoryPermission::UserReadWrite);
|
||||
return ResultSuccess;
|
||||
@@ -871,8 +863,9 @@ ResultCode KPageTable::SetMemoryAttribute(VAddr addr, std::size_t size, u32 mask
|
||||
AttributeTestMask, KMemoryAttribute::None, ~AttributeTestMask));
|
||||
|
||||
// Determine the new attribute.
|
||||
const auto new_attr = ((old_attr & static_cast<KMemoryAttribute>(~mask)) |
|
||||
static_cast<KMemoryAttribute>(attr & mask));
|
||||
const KMemoryAttribute new_attr =
|
||||
static_cast<KMemoryAttribute>(((old_attr & static_cast<KMemoryAttribute>(~mask)) |
|
||||
static_cast<KMemoryAttribute>(attr & mask)));
|
||||
|
||||
// Perform operation.
|
||||
this->Operate(addr, num_pages, old_perm, OperationType::ChangePermissionsAndRefresh);
|
||||
@@ -896,6 +889,9 @@ ResultCode KPageTable::SetMaxHeapSize(std::size_t size) {
|
||||
}
|
||||
|
||||
ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
|
||||
// Lock the physical memory lock.
|
||||
std::lock_guard phys_lk(map_physical_memory_lock);
|
||||
|
||||
// Try to perform a reduction in heap, instead of an extension.
|
||||
VAddr cur_address{};
|
||||
std::size_t allocation_size{};
|
||||
@@ -1025,12 +1021,12 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages,
|
||||
}
|
||||
|
||||
if (is_map_only) {
|
||||
CASCADE_CODE(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr));
|
||||
R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr));
|
||||
} else {
|
||||
KPageLinkedList page_group;
|
||||
CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages,
|
||||
memory_pool, allocation_option));
|
||||
CASCADE_CODE(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup));
|
||||
R_TRY(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool,
|
||||
allocation_option));
|
||||
R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup));
|
||||
}
|
||||
|
||||
block_manager->Update(addr, needed_num_pages, state, perm);
|
||||
@@ -1186,7 +1182,7 @@ VAddr KPageTable::AllocateVirtualMemory(VAddr start, std::size_t region_num_page
|
||||
|
||||
ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, const KPageLinkedList& page_group,
|
||||
OperationType operation) {
|
||||
std::lock_guard lock{page_table_lock};
|
||||
ASSERT(this->IsLockedByCurrentThread());
|
||||
|
||||
ASSERT(Common::IsAligned(addr, PageSize));
|
||||
ASSERT(num_pages > 0);
|
||||
@@ -1211,7 +1207,7 @@ ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, const KPageLin
|
||||
|
||||
ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm,
|
||||
OperationType operation, PAddr map_addr) {
|
||||
std::lock_guard lock{page_table_lock};
|
||||
ASSERT(this->IsLockedByCurrentThread());
|
||||
|
||||
ASSERT(num_pages > 0);
|
||||
ASSERT(Common::IsAligned(addr, PageSize));
|
||||
|
||||
@@ -37,9 +37,8 @@ public:
|
||||
VAddr src_addr);
|
||||
ResultCode MapPhysicalMemory(VAddr addr, std::size_t size);
|
||||
ResultCode UnmapPhysicalMemory(VAddr addr, std::size_t size);
|
||||
ResultCode UnmapMemory(VAddr addr, std::size_t size);
|
||||
ResultCode Map(VAddr dst_addr, VAddr src_addr, std::size_t size);
|
||||
ResultCode Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size);
|
||||
ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
|
||||
ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
|
||||
ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state,
|
||||
KMemoryPermission perm);
|
||||
ResultCode UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state);
|
||||
@@ -88,7 +87,6 @@ private:
|
||||
ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list,
|
||||
KMemoryPermission perm);
|
||||
ResultCode UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list);
|
||||
void MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end);
|
||||
bool IsRegionMapped(VAddr address, u64 size);
|
||||
bool IsRegionContiguous(VAddr addr, u64 size) const;
|
||||
void AddRegionToPages(VAddr start, std::size_t num_pages, KPageLinkedList& page_linked_list);
|
||||
@@ -148,6 +146,7 @@ private:
|
||||
}
|
||||
|
||||
std::recursive_mutex page_table_lock;
|
||||
std::mutex map_physical_memory_lock;
|
||||
std::unique_ptr<KMemoryBlockManager> block_manager;
|
||||
|
||||
public:
|
||||
@@ -249,7 +248,9 @@ public:
|
||||
return !IsOutsideASLRRegion(address, size);
|
||||
}
|
||||
constexpr PAddr GetPhysicalAddr(VAddr addr) {
|
||||
return page_table_impl.backing_addr[addr >> PageBits] + addr;
|
||||
const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits];
|
||||
ASSERT(backing_addr);
|
||||
return backing_addr + addr;
|
||||
}
|
||||
constexpr bool Contains(VAddr addr) const {
|
||||
return address_space_start <= addr && addr <= address_space_end - 1;
|
||||
|
||||
@@ -258,7 +258,7 @@ private:
|
||||
|
||||
private:
|
||||
constexpr void ClearAffinityBit(u64& affinity, s32 core) {
|
||||
affinity &= ~(u64(1) << core);
|
||||
affinity &= ~(UINT64_C(1) << core);
|
||||
}
|
||||
|
||||
constexpr s32 GetNextCore(u64& affinity) {
|
||||
|
||||
@@ -146,6 +146,13 @@ ResultCode KProcess::Initialize(KProcess* process, Core::System& system, std::st
|
||||
// Open a reference to the resource limit.
|
||||
process->resource_limit->Open();
|
||||
|
||||
// Clear remaining fields.
|
||||
process->num_running_threads = 0;
|
||||
process->is_signaled = false;
|
||||
process->exception_thread = nullptr;
|
||||
process->is_suspended = false;
|
||||
process->schedule_count = 0;
|
||||
|
||||
return ResultSuccess;
|
||||
}
|
||||
|
||||
@@ -157,20 +164,17 @@ KResourceLimit* KProcess::GetResourceLimit() const {
|
||||
return resource_limit;
|
||||
}
|
||||
|
||||
void KProcess::IncrementThreadCount() {
|
||||
ASSERT(num_threads >= 0);
|
||||
num_created_threads++;
|
||||
|
||||
if (const auto count = ++num_threads; count > peak_num_threads) {
|
||||
peak_num_threads = count;
|
||||
}
|
||||
void KProcess::IncrementRunningThreadCount() {
|
||||
ASSERT(num_running_threads.load() >= 0);
|
||||
++num_running_threads;
|
||||
}
|
||||
|
||||
void KProcess::DecrementThreadCount() {
|
||||
ASSERT(num_threads > 0);
|
||||
void KProcess::DecrementRunningThreadCount() {
|
||||
ASSERT(num_running_threads.load() > 0);
|
||||
|
||||
if (const auto count = --num_threads; count == 0) {
|
||||
LOG_WARNING(Kernel, "Process termination is not fully implemented.");
|
||||
if (const auto prev = num_running_threads--; prev == 1) {
|
||||
// TODO(bunnei): Process termination to be implemented when multiprocess is supported.
|
||||
UNIMPLEMENTED_MSG("KProcess termination is not implemennted!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -235,8 +235,8 @@ public:
|
||||
++schedule_count;
|
||||
}
|
||||
|
||||
void IncrementThreadCount();
|
||||
void DecrementThreadCount();
|
||||
void IncrementRunningThreadCount();
|
||||
void DecrementRunningThreadCount();
|
||||
|
||||
void SetRunningThread(s32 core, KThread* thread, u64 idle_count) {
|
||||
running_threads[core] = thread;
|
||||
@@ -473,9 +473,7 @@ private:
|
||||
bool is_suspended{};
|
||||
bool is_initialized{};
|
||||
|
||||
std::atomic<s32> num_created_threads{};
|
||||
std::atomic<u16> num_threads{};
|
||||
u16 peak_num_threads{};
|
||||
std::atomic<u16> num_running_threads{};
|
||||
|
||||
std::array<KThread*, Core::Hardware::NUM_CPU_CORES> running_threads{};
|
||||
std::array<u64, Core::Hardware::NUM_CPU_CORES> running_thread_idle_counts{};
|
||||
|
||||
@@ -710,23 +710,19 @@ void KScheduler::Unload(KThread* thread) {
|
||||
}
|
||||
|
||||
void KScheduler::Reload(KThread* thread) {
|
||||
LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread ? thread->GetName() : "nullptr");
|
||||
LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread->GetName());
|
||||
|
||||
if (thread) {
|
||||
ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable.");
|
||||
|
||||
Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
|
||||
cpu_core.LoadContext(thread->GetContext32());
|
||||
cpu_core.LoadContext(thread->GetContext64());
|
||||
cpu_core.SetTlsAddress(thread->GetTLSAddress());
|
||||
cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
|
||||
cpu_core.ClearExclusiveState();
|
||||
}
|
||||
Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
|
||||
cpu_core.LoadContext(thread->GetContext32());
|
||||
cpu_core.LoadContext(thread->GetContext64());
|
||||
cpu_core.SetTlsAddress(thread->GetTLSAddress());
|
||||
cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
|
||||
cpu_core.ClearExclusiveState();
|
||||
}
|
||||
|
||||
void KScheduler::SwitchContextStep2() {
|
||||
// Load context of new thread
|
||||
Reload(current_thread.load());
|
||||
Reload(GetCurrentThread());
|
||||
|
||||
RescheduleCurrentCore();
|
||||
}
|
||||
@@ -735,13 +731,17 @@ void KScheduler::ScheduleImpl() {
|
||||
KThread* previous_thread = GetCurrentThread();
|
||||
KThread* next_thread = state.highest_priority_thread;
|
||||
|
||||
state.needs_scheduling = false;
|
||||
state.needs_scheduling.store(false);
|
||||
|
||||
// We never want to schedule a null thread, so use the idle thread if we don't have a next.
|
||||
if (next_thread == nullptr) {
|
||||
next_thread = idle_thread;
|
||||
}
|
||||
|
||||
if (next_thread->GetCurrentCore() != core_id) {
|
||||
next_thread->SetCurrentCore(core_id);
|
||||
}
|
||||
|
||||
// We never want to schedule a dummy thread, as these are only used by host threads for locking.
|
||||
if (next_thread->GetThreadType() == ThreadType::Dummy) {
|
||||
ASSERT_MSG(false, "Dummy threads should never be scheduled!");
|
||||
@@ -755,14 +755,8 @@ void KScheduler::ScheduleImpl() {
|
||||
return;
|
||||
}
|
||||
|
||||
if (next_thread->GetCurrentCore() != core_id) {
|
||||
next_thread->SetCurrentCore(core_id);
|
||||
}
|
||||
|
||||
current_thread.store(next_thread);
|
||||
|
||||
// Update the CPU time tracking variables.
|
||||
KProcess* const previous_process = system.Kernel().CurrentProcess();
|
||||
|
||||
UpdateLastContextSwitchTime(previous_thread, previous_process);
|
||||
|
||||
// Save context for previous thread
|
||||
@@ -770,6 +764,10 @@ void KScheduler::ScheduleImpl() {
|
||||
|
||||
std::shared_ptr<Common::Fiber>* old_context;
|
||||
old_context = &previous_thread->GetHostContext();
|
||||
|
||||
// Set the new thread.
|
||||
current_thread.store(next_thread);
|
||||
|
||||
guard.Unlock();
|
||||
|
||||
Common::Fiber::YieldTo(*old_context, *switch_fiber);
|
||||
@@ -797,8 +795,8 @@ void KScheduler::SwitchToCurrent() {
|
||||
do {
|
||||
auto next_thread = current_thread.load();
|
||||
if (next_thread != nullptr) {
|
||||
next_thread->context_guard.Lock();
|
||||
if (next_thread->GetRawState() != ThreadState::Runnable) {
|
||||
const auto locked = next_thread->context_guard.TryLock();
|
||||
if (state.needs_scheduling.load()) {
|
||||
next_thread->context_guard.Unlock();
|
||||
break;
|
||||
}
|
||||
@@ -806,6 +804,9 @@ void KScheduler::SwitchToCurrent() {
|
||||
next_thread->context_guard.Unlock();
|
||||
break;
|
||||
}
|
||||
if (!locked) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
auto thread = next_thread ? next_thread : idle_thread;
|
||||
Common::Fiber::YieldTo(switch_fiber, *thread->GetHostContext());
|
||||
|
||||
@@ -215,7 +215,6 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s
|
||||
|
||||
parent = owner;
|
||||
parent->Open();
|
||||
parent->IncrementThreadCount();
|
||||
}
|
||||
|
||||
// Initialize thread context.
|
||||
@@ -327,11 +326,6 @@ void KThread::Finalize() {
|
||||
}
|
||||
}
|
||||
|
||||
// Decrement the parent process's thread count.
|
||||
if (parent != nullptr) {
|
||||
parent->DecrementThreadCount();
|
||||
}
|
||||
|
||||
// Perform inherited finalization.
|
||||
KSynchronizationObject::Finalize();
|
||||
}
|
||||
@@ -1011,7 +1005,7 @@ ResultCode KThread::Run() {
|
||||
if (IsUserThread() && IsSuspended()) {
|
||||
this->UpdateState();
|
||||
}
|
||||
owner->IncrementThreadCount();
|
||||
owner->IncrementRunningThreadCount();
|
||||
}
|
||||
|
||||
// Set our state and finish.
|
||||
@@ -1026,10 +1020,11 @@ ResultCode KThread::Run() {
|
||||
void KThread::Exit() {
|
||||
ASSERT(this == GetCurrentThreadPointer(kernel));
|
||||
|
||||
// Release the thread resource hint from parent.
|
||||
// Release the thread resource hint, running thread count from parent.
|
||||
if (parent != nullptr) {
|
||||
parent->GetResourceLimit()->Release(Kernel::LimitableResource::Threads, 0, 1);
|
||||
resource_limit_release_hint = true;
|
||||
parent->DecrementRunningThreadCount();
|
||||
}
|
||||
|
||||
// Perform termination.
|
||||
|
||||
@@ -230,7 +230,7 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr
|
||||
return result;
|
||||
}
|
||||
|
||||
return page_table.Map(dst_addr, src_addr, size);
|
||||
return page_table.MapMemory(dst_addr, src_addr, size);
|
||||
}
|
||||
|
||||
static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) {
|
||||
@@ -249,7 +249,7 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad
|
||||
return result;
|
||||
}
|
||||
|
||||
return page_table.Unmap(dst_addr, src_addr, size);
|
||||
return page_table.UnmapMemory(dst_addr, src_addr, size);
|
||||
}
|
||||
|
||||
static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) {
|
||||
|
||||
@@ -109,8 +109,9 @@ public:
|
||||
|
||||
bool HasHDRumble() const {
|
||||
if (sdl_controller) {
|
||||
return (SDL_GameControllerGetType(sdl_controller.get()) ==
|
||||
SDL_CONTROLLER_TYPE_NINTENDO_SWITCH_PRO);
|
||||
const auto type = SDL_GameControllerGetType(sdl_controller.get());
|
||||
return (type == SDL_CONTROLLER_TYPE_NINTENDO_SWITCH_PRO) ||
|
||||
(type == SDL_CONTROLLER_TYPE_PS5);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -339,7 +339,7 @@ void UDPClient::StartCommunication(std::size_t client, const std::string& host,
|
||||
}
|
||||
}
|
||||
|
||||
const PadIdentifier UDPClient::GetPadIdentifier(std::size_t pad_index) const {
|
||||
PadIdentifier UDPClient::GetPadIdentifier(std::size_t pad_index) const {
|
||||
const std::size_t client = pad_index / PADS_PER_CLIENT;
|
||||
return {
|
||||
.guid = clients[client].uuid,
|
||||
@@ -348,9 +348,9 @@ const PadIdentifier UDPClient::GetPadIdentifier(std::size_t pad_index) const {
|
||||
};
|
||||
}
|
||||
|
||||
const Common::UUID UDPClient::GetHostUUID(const std::string host) const {
|
||||
const auto ip = boost::asio::ip::address_v4::from_string(host);
|
||||
const auto hex_host = fmt::format("{:06x}", ip.to_ulong());
|
||||
Common::UUID UDPClient::GetHostUUID(const std::string& host) const {
|
||||
const auto ip = boost::asio::ip::make_address_v4(host);
|
||||
const auto hex_host = fmt::format("{:06x}", ip.to_uint());
|
||||
return Common::UUID{hex_host};
|
||||
}
|
||||
|
||||
|
||||
@@ -145,8 +145,8 @@ private:
|
||||
void OnPortInfo(Response::PortInfo);
|
||||
void OnPadData(Response::PadData, std::size_t client);
|
||||
void StartCommunication(std::size_t client, const std::string& host, u16 port);
|
||||
const PadIdentifier GetPadIdentifier(std::size_t pad_index) const;
|
||||
const Common::UUID GetHostUUID(const std::string host) const;
|
||||
PadIdentifier GetPadIdentifier(std::size_t pad_index) const;
|
||||
Common::UUID GetHostUUID(const std::string& host) const;
|
||||
|
||||
Common::Input::ButtonNames GetUIButtonName(const Common::ParamPackage& params) const;
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
// Pad Identifier of data source
|
||||
struct PadIdentifier {
|
||||
Common::UUID guid{};
|
||||
Common::UUID guid{Common::INVALID_UUID};
|
||||
std::size_t port{};
|
||||
std::size_t pad{};
|
||||
|
||||
@@ -89,7 +89,7 @@ struct UpdateCallback {
|
||||
|
||||
// Triggered if data changed on the controller and the engine is on configuring mode
|
||||
struct MappingCallback {
|
||||
std::function<void(MappingData)> on_data;
|
||||
std::function<void(const MappingData&)> on_data;
|
||||
};
|
||||
|
||||
// Input Identifier of data source
|
||||
|
||||
@@ -2,14 +2,13 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/settings.h"
|
||||
#include "input_common/input_engine.h"
|
||||
#include "input_common/input_mapping.h"
|
||||
|
||||
namespace InputCommon {
|
||||
|
||||
MappingFactory::MappingFactory() {}
|
||||
MappingFactory::MappingFactory() = default;
|
||||
|
||||
void MappingFactory::BeginMapping(Polling::InputType type) {
|
||||
is_enabled = true;
|
||||
@@ -19,7 +18,7 @@ void MappingFactory::BeginMapping(Polling::InputType type) {
|
||||
second_axis = -1;
|
||||
}
|
||||
|
||||
[[nodiscard]] const Common::ParamPackage MappingFactory::GetNextInput() {
|
||||
Common::ParamPackage MappingFactory::GetNextInput() {
|
||||
Common::ParamPackage input;
|
||||
input_queue.Pop(input);
|
||||
return input;
|
||||
@@ -57,7 +56,7 @@ void MappingFactory::StopMapping() {
|
||||
void MappingFactory::RegisterButton(const MappingData& data) {
|
||||
Common::ParamPackage new_input;
|
||||
new_input.Set("engine", data.engine);
|
||||
if (data.pad.guid != Common::UUID{}) {
|
||||
if (data.pad.guid.IsValid()) {
|
||||
new_input.Set("guid", data.pad.guid.Format());
|
||||
}
|
||||
new_input.Set("port", static_cast<int>(data.pad.port));
|
||||
@@ -93,7 +92,7 @@ void MappingFactory::RegisterButton(const MappingData& data) {
|
||||
void MappingFactory::RegisterStick(const MappingData& data) {
|
||||
Common::ParamPackage new_input;
|
||||
new_input.Set("engine", data.engine);
|
||||
if (data.pad.guid != Common::UUID{}) {
|
||||
if (data.pad.guid.IsValid()) {
|
||||
new_input.Set("guid", data.pad.guid.Format());
|
||||
}
|
||||
new_input.Set("port", static_cast<int>(data.pad.port));
|
||||
@@ -138,7 +137,7 @@ void MappingFactory::RegisterStick(const MappingData& data) {
|
||||
void MappingFactory::RegisterMotion(const MappingData& data) {
|
||||
Common::ParamPackage new_input;
|
||||
new_input.Set("engine", data.engine);
|
||||
if (data.pad.guid != Common::UUID{}) {
|
||||
if (data.pad.guid.IsValid()) {
|
||||
new_input.Set("guid", data.pad.guid.Format());
|
||||
}
|
||||
new_input.Set("port", static_cast<int>(data.pad.port));
|
||||
|
||||
@@ -3,8 +3,14 @@
|
||||
// Refer to the license.txt file included
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/param_package.h"
|
||||
#include "common/threadsafe_queue.h"
|
||||
|
||||
namespace InputCommon::Polling {
|
||||
enum class InputType;
|
||||
}
|
||||
|
||||
namespace InputCommon {
|
||||
class InputEngine;
|
||||
struct MappingData;
|
||||
@@ -20,7 +26,7 @@ public:
|
||||
void BeginMapping(Polling::InputType type);
|
||||
|
||||
/// Returns an input event with mapping information from the input_queue
|
||||
[[nodiscard]] const Common::ParamPackage GetNextInput();
|
||||
[[nodiscard]] Common::ParamPackage GetNextInput();
|
||||
|
||||
/**
|
||||
* Registers mapping input data from the driver
|
||||
|
||||
@@ -27,7 +27,7 @@ namespace InputCommon {
|
||||
struct InputSubsystem::Impl {
|
||||
void Initialize() {
|
||||
mapping_factory = std::make_shared<MappingFactory>();
|
||||
MappingCallback mapping_callback{[this](MappingData data) { RegisterInput(data); }};
|
||||
MappingCallback mapping_callback{[this](const MappingData& data) { RegisterInput(data); }};
|
||||
|
||||
keyboard = std::make_shared<Keyboard>("keyboard");
|
||||
keyboard->SetMappingCallback(mapping_callback);
|
||||
@@ -284,7 +284,7 @@ struct InputSubsystem::Impl {
|
||||
#endif
|
||||
}
|
||||
|
||||
void RegisterInput(MappingData data) {
|
||||
void RegisterInput(const MappingData& data) {
|
||||
mapping_factory->RegisterInput(data);
|
||||
}
|
||||
|
||||
@@ -394,7 +394,7 @@ void InputSubsystem::BeginMapping(Polling::InputType type) {
|
||||
impl->mapping_factory->BeginMapping(type);
|
||||
}
|
||||
|
||||
const Common::ParamPackage InputSubsystem::GetNextInput() const {
|
||||
Common::ParamPackage InputSubsystem::GetNextInput() const {
|
||||
return impl->mapping_factory->GetNextInput();
|
||||
}
|
||||
|
||||
|
||||
@@ -126,7 +126,7 @@ public:
|
||||
void BeginMapping(Polling::InputType type);
|
||||
|
||||
/// Returns an input event with mapping information.
|
||||
[[nodiscard]] const Common::ParamPackage GetNextInput() const;
|
||||
[[nodiscard]] Common::ParamPackage GetNextInput() const;
|
||||
|
||||
/// Stop polling from all backends.
|
||||
void StopMapping() const;
|
||||
|
||||
@@ -372,6 +372,8 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 poin
|
||||
ScalarU32 value);
|
||||
void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||
Register value);
|
||||
void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
|
||||
Register value);
|
||||
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, ScalarU32 value);
|
||||
void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
@@ -412,6 +414,24 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b
|
||||
ScalarU32 offset, Register value);
|
||||
void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, Register value);
|
||||
void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, Register value);
|
||||
void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, Register value);
|
||||
void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, Register value);
|
||||
void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, Register value);
|
||||
void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, Register value);
|
||||
void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, Register value);
|
||||
void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, Register value);
|
||||
void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, Register value);
|
||||
void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, Register value);
|
||||
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, ScalarF32 value);
|
||||
void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
@@ -448,6 +468,17 @@ void EmitGlobalAtomicAnd64(EmitContext& ctx);
|
||||
void EmitGlobalAtomicOr64(EmitContext& ctx);
|
||||
void EmitGlobalAtomicXor64(EmitContext& ctx);
|
||||
void EmitGlobalAtomicExchange64(EmitContext& ctx);
|
||||
void EmitGlobalAtomicIAdd32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicSMin32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicUMin32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicSMax32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicUMax32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicInc32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicDec32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicAnd32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicOr32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicXor32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicExchange32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicAddF32(EmitContext& ctx);
|
||||
void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
|
||||
|
||||
@@ -311,6 +311,13 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 poin
|
||||
ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
|
||||
}
|
||||
|
||||
void EmitSharedAtomicExchange32x2([[maybe_unused]] EmitContext& ctx,
|
||||
[[maybe_unused]] IR::Inst& inst,
|
||||
[[maybe_unused]] ScalarU32 pointer_offset,
|
||||
[[maybe_unused]] Register value) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, ScalarU32 value) {
|
||||
Atom(ctx, inst, binding, offset, value, "ADD", "U32");
|
||||
@@ -411,6 +418,62 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val
|
||||
Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
|
||||
}
|
||||
|
||||
void EmitStorageAtomicIAdd32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
||||
[[maybe_unused]] const IR::Value& binding,
|
||||
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitStorageAtomicSMin32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
||||
[[maybe_unused]] const IR::Value& binding,
|
||||
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitStorageAtomicUMin32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
||||
[[maybe_unused]] const IR::Value& binding,
|
||||
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitStorageAtomicSMax32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
||||
[[maybe_unused]] const IR::Value& binding,
|
||||
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitStorageAtomicUMax32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
||||
[[maybe_unused]] const IR::Value& binding,
|
||||
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitStorageAtomicAnd32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
||||
[[maybe_unused]] const IR::Value& binding,
|
||||
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitStorageAtomicOr32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
||||
[[maybe_unused]] const IR::Value& binding,
|
||||
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitStorageAtomicXor32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
|
||||
[[maybe_unused]] const IR::Value& binding,
|
||||
[[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitStorageAtomicExchange32x2([[maybe_unused]] EmitContext& ctx,
|
||||
[[maybe_unused]] IR::Inst& inst,
|
||||
[[maybe_unused]] const IR::Value& binding,
|
||||
[[maybe_unused]] ScalarU32 offset,
|
||||
[[maybe_unused]] Register value) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
ScalarU32 offset, ScalarF32 value) {
|
||||
Atom(ctx, inst, binding, offset, value, "ADD", "F32");
|
||||
@@ -537,6 +600,50 @@ void EmitGlobalAtomicExchange64(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicIAdd32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicSMin32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicUMin32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicSMax32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicUMax32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicInc32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicDec32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicAnd32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicOr32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicXor32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicExchange32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicAddF32(EmitContext&) {
|
||||
throw NotImplementedException("GLASM instruction");
|
||||
}
|
||||
|
||||
@@ -105,6 +105,13 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_vi
|
||||
pointer_offset, value, pointer_offset, value);
|
||||
}
|
||||
|
||||
void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
|
||||
std::string_view value) {
|
||||
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, pointer_offset, pointer_offset);
|
||||
ctx.Add("smem[{}>>2]={}.x;smem[({}+4)>>2]={}.y;", pointer_offset, value, pointer_offset, value);
|
||||
}
|
||||
|
||||
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
|
||||
@@ -265,6 +272,97 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
||||
void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("{}_ssbo{}[{}>>2]+={}.x;{}_ssbo{}[({}>>2)+1]+={}.y;", ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset), value, ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
||||
void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int "
|
||||
"i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}",
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
||||
void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int i=0;i<2;++i){{ "
|
||||
"{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],{}[i]);}}",
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
||||
void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int "
|
||||
"i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}",
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
||||
void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
|
||||
ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
|
||||
ctx.var_alloc.Consume(offset));
|
||||
ctx.Add("for(int i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],{}[i]);}}",
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
|
||||
binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
||||
void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
|
||||
ctx.AddU32x2("{}=uvec2(atomicAnd({}_ssbo{}[{}>>2],{}.x),atomicAnd({}_ssbo{}[({}>>2)+1],{}.y));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
||||
void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
|
||||
ctx.AddU32x2("{}=uvec2(atomicOr({}_ssbo{}[{}>>2],{}.x),atomicOr({}_ssbo{}[({}>>2)+1],{}.y));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
||||
void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
|
||||
ctx.AddU32x2("{}=uvec2(atomicXor({}_ssbo{}[{}>>2],{}.x),atomicXor({}_ssbo{}[({}>>2)+1],{}.y));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
||||
void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
|
||||
ctx.AddU32x2("{}=uvec2(atomicExchange({}_ssbo{}[{}>>2],{}.x),atomicExchange({}_ssbo{}[({}>>2)+"
|
||||
"1],{}.y));",
|
||||
inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
|
||||
ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
|
||||
}
|
||||
|
||||
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value) {
|
||||
SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd");
|
||||
@@ -388,6 +486,50 @@ void EmitGlobalAtomicExchange64(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicIAdd32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicSMin32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicUMin32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicSMax32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicUMax32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicInc32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicDec32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicAnd32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicOr32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicXor32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicExchange32x2(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
void EmitGlobalAtomicAddF32(EmitContext&) {
|
||||
throw NotImplementedException("GLSL Instrucion");
|
||||
}
|
||||
|
||||
@@ -442,6 +442,8 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi
|
||||
std::string_view value);
|
||||
void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
|
||||
std::string_view value);
|
||||
void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
|
||||
std::string_view value);
|
||||
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
@@ -482,6 +484,24 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
const IR::Value& offset, std::string_view value);
|
||||
void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
|
||||
@@ -518,6 +538,17 @@ void EmitGlobalAtomicAnd64(EmitContext& ctx);
|
||||
void EmitGlobalAtomicOr64(EmitContext& ctx);
|
||||
void EmitGlobalAtomicXor64(EmitContext& ctx);
|
||||
void EmitGlobalAtomicExchange64(EmitContext& ctx);
|
||||
void EmitGlobalAtomicIAdd32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicSMin32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicUMin32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicSMax32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicUMax32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicInc32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicDec32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicAnd32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicOr32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicXor32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicExchange32x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicAddF32(EmitContext& ctx);
|
||||
void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
|
||||
void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
|
||||
|
||||
@@ -387,6 +387,14 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
|
||||
}
|
||||
}
|
||||
|
||||
void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) {
|
||||
if (ctx.runtime_info.xfb_varyings.empty()) {
|
||||
return;
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::TransformFeedback);
|
||||
ctx.AddExecutionMode(main_func, spv::ExecutionMode::Xfb);
|
||||
}
|
||||
|
||||
void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) {
|
||||
if (info.uses_sampled_1d) {
|
||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||
@@ -442,9 +450,6 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
|
||||
if (info.uses_sample_id) {
|
||||
ctx.AddCapability(spv::Capability::SampleRateShading);
|
||||
}
|
||||
if (!ctx.runtime_info.xfb_varyings.empty()) {
|
||||
ctx.AddCapability(spv::Capability::TransformFeedback);
|
||||
}
|
||||
if (info.uses_derivatives) {
|
||||
ctx.AddCapability(spv::Capability::DerivativeControl);
|
||||
}
|
||||
@@ -484,6 +489,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
|
||||
SetupSignedNanCapabilities(profile, program, ctx, main);
|
||||
}
|
||||
SetupCapabilities(profile, program.info, ctx);
|
||||
SetupTransformFeedbackCapabilities(ctx, main);
|
||||
PatchPhiNodes(program, ctx);
|
||||
return ctx.Assemble();
|
||||
}
|
||||
|
||||
@@ -74,7 +74,7 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value&
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
|
||||
}
|
||||
LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
|
||||
binding, offset, sizeof(u32[2]))};
|
||||
const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
|
||||
@@ -82,6 +82,17 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value&
|
||||
ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result));
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
|
||||
Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
|
||||
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
|
||||
binding, offset, sizeof(u32[2]))};
|
||||
const Id original_value{ctx.OpLoad(ctx.U32[2], pointer)};
|
||||
const Id result{(ctx.*non_atomic_func)(ctx.U32[2], value, original_value)};
|
||||
ctx.OpStore(pointer, result);
|
||||
return original_value;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
|
||||
@@ -141,7 +152,7 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
|
||||
}
|
||||
LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_1{SharedPointer(ctx, offset, 0)};
|
||||
const Id pointer_2{SharedPointer(ctx, offset, 1)};
|
||||
const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
|
||||
@@ -152,6 +163,18 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
|
||||
return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2));
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id offset, Id value) {
|
||||
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_1{SharedPointer(ctx, offset, 0)};
|
||||
const Id pointer_2{SharedPointer(ctx, offset, 1)};
|
||||
const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
|
||||
const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
|
||||
const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)};
|
||||
ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U));
|
||||
ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U));
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd);
|
||||
@@ -267,7 +290,7 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
|
||||
}
|
||||
LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
|
||||
binding, offset, sizeof(u32[2]))};
|
||||
const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
|
||||
@@ -275,6 +298,56 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const
|
||||
return original;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicIAdd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpIAdd);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicSMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpSMin);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicUMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpUMin);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicSMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpSMax);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicUMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpUMax);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAnd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseAnd);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicOr32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseOr);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseXor);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding,
|
||||
const IR::Value& offset, Id value) {
|
||||
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
|
||||
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
|
||||
binding, offset, sizeof(u32[2]))};
|
||||
const Id original{ctx.OpLoad(ctx.U32[2], pointer)};
|
||||
ctx.OpStore(pointer, value);
|
||||
return original;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()].U32};
|
||||
@@ -418,6 +491,50 @@ Id EmitGlobalAtomicExchange64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicIAdd32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicSMin32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicUMin32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicSMax32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicUMax32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicInc32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicDec32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAnd32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicOr32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicXor32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicExchange32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAddF32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
@@ -335,6 +335,7 @@ Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
@@ -375,6 +376,24 @@ Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::
|
||||
Id value);
|
||||
Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicIAdd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicSMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicUMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicSMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicUMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicAnd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicOr32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding,
|
||||
const IR::Value& offset, Id value);
|
||||
Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
@@ -411,6 +430,17 @@ Id EmitGlobalAtomicAnd64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicOr64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicXor64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicExchange64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicIAdd32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicSMin32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicUMin32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicSMax32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicUMax32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicInc32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicDec32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicAnd32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicOr32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicXor32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicExchange32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicAddF32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicAddF16x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicAddF32x2(EmitContext& ctx);
|
||||
|
||||
@@ -118,6 +118,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
||||
case Opcode::SharedAtomicXor32:
|
||||
case Opcode::SharedAtomicExchange32:
|
||||
case Opcode::SharedAtomicExchange64:
|
||||
case Opcode::SharedAtomicExchange32x2:
|
||||
case Opcode::GlobalAtomicIAdd32:
|
||||
case Opcode::GlobalAtomicSMin32:
|
||||
case Opcode::GlobalAtomicUMin32:
|
||||
@@ -138,6 +139,15 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
||||
case Opcode::GlobalAtomicOr64:
|
||||
case Opcode::GlobalAtomicXor64:
|
||||
case Opcode::GlobalAtomicExchange64:
|
||||
case Opcode::GlobalAtomicIAdd32x2:
|
||||
case Opcode::GlobalAtomicSMin32x2:
|
||||
case Opcode::GlobalAtomicUMin32x2:
|
||||
case Opcode::GlobalAtomicSMax32x2:
|
||||
case Opcode::GlobalAtomicUMax32x2:
|
||||
case Opcode::GlobalAtomicAnd32x2:
|
||||
case Opcode::GlobalAtomicOr32x2:
|
||||
case Opcode::GlobalAtomicXor32x2:
|
||||
case Opcode::GlobalAtomicExchange32x2:
|
||||
case Opcode::GlobalAtomicAddF32:
|
||||
case Opcode::GlobalAtomicAddF16x2:
|
||||
case Opcode::GlobalAtomicAddF32x2:
|
||||
@@ -165,6 +175,15 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
||||
case Opcode::StorageAtomicOr64:
|
||||
case Opcode::StorageAtomicXor64:
|
||||
case Opcode::StorageAtomicExchange64:
|
||||
case Opcode::StorageAtomicIAdd32x2:
|
||||
case Opcode::StorageAtomicSMin32x2:
|
||||
case Opcode::StorageAtomicUMin32x2:
|
||||
case Opcode::StorageAtomicSMax32x2:
|
||||
case Opcode::StorageAtomicUMax32x2:
|
||||
case Opcode::StorageAtomicAnd32x2:
|
||||
case Opcode::StorageAtomicOr32x2:
|
||||
case Opcode::StorageAtomicXor32x2:
|
||||
case Opcode::StorageAtomicExchange32x2:
|
||||
case Opcode::StorageAtomicAddF32:
|
||||
case Opcode::StorageAtomicAddF16x2:
|
||||
case Opcode::StorageAtomicAddF32x2:
|
||||
|
||||
@@ -341,6 +341,7 @@ OPCODE(SharedAtomicOr32, U32, U32,
|
||||
OPCODE(SharedAtomicXor32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicExchange32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicExchange64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicExchange32x2, U32x2, U32, U32x2, )
|
||||
|
||||
OPCODE(GlobalAtomicIAdd32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicSMin32, U32, U64, U32, )
|
||||
@@ -362,6 +363,15 @@ OPCODE(GlobalAtomicAnd64, U64, U64,
|
||||
OPCODE(GlobalAtomicOr64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicXor64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicExchange64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicIAdd32x2, U32x2, U32x2, U32x2, )
|
||||
OPCODE(GlobalAtomicSMin32x2, U32x2, U32x2, U32x2, )
|
||||
OPCODE(GlobalAtomicUMin32x2, U32x2, U32x2, U32x2, )
|
||||
OPCODE(GlobalAtomicSMax32x2, U32x2, U32x2, U32x2, )
|
||||
OPCODE(GlobalAtomicUMax32x2, U32x2, U32x2, U32x2, )
|
||||
OPCODE(GlobalAtomicAnd32x2, U32x2, U32x2, U32x2, )
|
||||
OPCODE(GlobalAtomicOr32x2, U32x2, U32x2, U32x2, )
|
||||
OPCODE(GlobalAtomicXor32x2, U32x2, U32x2, U32x2, )
|
||||
OPCODE(GlobalAtomicExchange32x2, U32x2, U32x2, U32x2, )
|
||||
OPCODE(GlobalAtomicAddF32, F32, U64, F32, )
|
||||
OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, )
|
||||
OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, )
|
||||
@@ -390,6 +400,15 @@ OPCODE(StorageAtomicAnd64, U64, U32,
|
||||
OPCODE(StorageAtomicOr64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicXor64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicIAdd32x2, U32x2, U32, U32, U32x2, )
|
||||
OPCODE(StorageAtomicSMin32x2, U32x2, U32, U32, U32x2, )
|
||||
OPCODE(StorageAtomicUMin32x2, U32x2, U32, U32, U32x2, )
|
||||
OPCODE(StorageAtomicSMax32x2, U32x2, U32, U32, U32x2, )
|
||||
OPCODE(StorageAtomicUMax32x2, U32x2, U32, U32, U32x2, )
|
||||
OPCODE(StorageAtomicAnd32x2, U32x2, U32, U32, U32x2, )
|
||||
OPCODE(StorageAtomicOr32x2, U32x2, U32, U32, U32x2, )
|
||||
OPCODE(StorageAtomicXor32x2, U32x2, U32, U32, U32x2, )
|
||||
OPCODE(StorageAtomicExchange32x2, U32x2, U32, U32, U32x2, )
|
||||
OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, )
|
||||
OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, )
|
||||
OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, )
|
||||
|
||||
@@ -57,16 +57,6 @@ void TranslatorVisitor::VMNMX(u64 insn) {
|
||||
if (vmnmx.sat != 0) {
|
||||
throw NotImplementedException("VMNMX SAT");
|
||||
}
|
||||
// Selectors were shown to default to 2 in unit tests
|
||||
if (vmnmx.src_a_selector != 2) {
|
||||
throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
|
||||
}
|
||||
if (vmnmx.src_b_selector != 2) {
|
||||
throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
|
||||
}
|
||||
if (vmnmx.src_a_width != VideoWidth::Word) {
|
||||
throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
|
||||
}
|
||||
|
||||
const bool is_b_imm{vmnmx.is_src_b_reg == 0};
|
||||
const IR::U32 src_a{GetReg8(insn)};
|
||||
@@ -76,10 +66,14 @@ void TranslatorVisitor::VMNMX(u64 insn) {
|
||||
const VideoWidth a_width{vmnmx.src_a_width};
|
||||
const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
|
||||
|
||||
const u32 a_selector{static_cast<u32>(vmnmx.src_a_selector)};
|
||||
// Immediate values can't have a selector
|
||||
const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmnmx.src_b_selector)};
|
||||
|
||||
const bool src_a_signed{vmnmx.src_a_sign != 0};
|
||||
const bool src_b_signed{vmnmx.src_b_sign != 0};
|
||||
const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
|
||||
const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
|
||||
const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
|
||||
const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
|
||||
|
||||
// First operation's sign is only dependent on operand b's sign
|
||||
const bool op_1_signed{src_b_signed};
|
||||
|
||||
@@ -21,7 +21,6 @@ namespace Shader::Maxwell {
|
||||
[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
|
||||
Environment& env_vertex_b);
|
||||
|
||||
[[nodiscard]] void ConvertLegacyToGeneric(IR::Program& program,
|
||||
const Shader::RuntimeInfo& runtime_info);
|
||||
void ConvertLegacyToGeneric(IR::Program& program, const RuntimeInfo& runtime_info);
|
||||
|
||||
} // namespace Shader::Maxwell
|
||||
|
||||
@@ -360,6 +360,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
@@ -597,6 +606,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||
break;
|
||||
case IR::Opcode::LoadStorage64:
|
||||
case IR::Opcode::WriteStorage64:
|
||||
case IR::Opcode::StorageAtomicIAdd32x2:
|
||||
case IR::Opcode::StorageAtomicSMin32x2:
|
||||
case IR::Opcode::StorageAtomicUMin32x2:
|
||||
case IR::Opcode::StorageAtomicSMax32x2:
|
||||
case IR::Opcode::StorageAtomicUMax32x2:
|
||||
case IR::Opcode::StorageAtomicAnd32x2:
|
||||
case IR::Opcode::StorageAtomicOr32x2:
|
||||
case IR::Opcode::StorageAtomicXor32x2:
|
||||
case IR::Opcode::StorageAtomicExchange32x2:
|
||||
info.used_storage_buffer_types |= IR::Type::U32x2;
|
||||
break;
|
||||
case IR::Opcode::LoadStorage128:
|
||||
@@ -688,7 +706,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||
case IR::Opcode::StorageAtomicAnd64:
|
||||
case IR::Opcode::StorageAtomicOr64:
|
||||
case IR::Opcode::StorageAtomicXor64:
|
||||
info.used_storage_buffer_types |= IR::Type::U64;
|
||||
info.used_storage_buffer_types |= IR::Type::U64 | IR::Type::U32x2;
|
||||
info.uses_int64_bit_atomics = true;
|
||||
break;
|
||||
case IR::Opcode::BindlessImageAtomicIAdd32:
|
||||
|
||||
@@ -92,6 +92,15 @@ bool IsGlobalMemory(const IR::Inst& inst) {
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
@@ -135,6 +144,15 @@ bool IsGlobalMemoryWrite(const IR::Inst& inst) {
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
@@ -199,6 +217,8 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
|
||||
return IR::Opcode::StorageAtomicOr32;
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
return IR::Opcode::StorageAtomicXor32;
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
return IR::Opcode::StorageAtomicExchange32;
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
return IR::Opcode::StorageAtomicIAdd64;
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
@@ -215,10 +235,26 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
|
||||
return IR::Opcode::StorageAtomicOr64;
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
return IR::Opcode::StorageAtomicXor64;
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
return IR::Opcode::StorageAtomicExchange32;
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
return IR::Opcode::StorageAtomicExchange64;
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
return IR::Opcode::StorageAtomicIAdd32x2;
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
return IR::Opcode::StorageAtomicSMin32x2;
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
return IR::Opcode::StorageAtomicUMin32x2;
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
return IR::Opcode::StorageAtomicSMax32x2;
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
return IR::Opcode::StorageAtomicUMax32x2;
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
return IR::Opcode::StorageAtomicAnd32x2;
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
return IR::Opcode::StorageAtomicOr32x2;
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
return IR::Opcode::StorageAtomicXor32x2;
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
return IR::Opcode::StorageAtomicExchange32x2;
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
return IR::Opcode::StorageAtomicAddF32;
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
@@ -454,6 +490,15 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
|
||||
@@ -199,6 +199,26 @@ void Lower(IR::Block& block, IR::Inst& inst) {
|
||||
return ShiftRightLogical64To32(block, inst);
|
||||
case IR::Opcode::ShiftRightArithmetic64:
|
||||
return ShiftRightArithmetic64To32(block, inst);
|
||||
case IR::Opcode::SharedAtomicExchange64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2);
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicIAdd32x2);
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMin32x2);
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMin32x2);
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMax32x2);
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMax32x2);
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicAnd32x2);
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicOr32x2);
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicXor32x2);
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicExchange32x2);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1474,6 +1474,8 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
|
||||
// When this memory region has been joined a bunch of times, we assume it's being used
|
||||
// as a stream buffer. Increase the size to skip constantly recreating buffers.
|
||||
has_stream_leap = true;
|
||||
begin -= PAGE_SIZE * 256;
|
||||
cpu_addr = begin;
|
||||
end += PAGE_SIZE * 256;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -240,7 +240,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
|
||||
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
|
||||
|
||||
// Execute the current macro.
|
||||
macro_engine->Execute(*this, macro_positions[entry], parameters);
|
||||
macro_engine->Execute(macro_positions[entry], parameters);
|
||||
if (mme_draw.current_mode != MMEDrawMode::Undefined) {
|
||||
FlushMMEInlineDraw();
|
||||
}
|
||||
|
||||
@@ -12,9 +12,6 @@
|
||||
#include "video_core/framebuffer_config.h"
|
||||
|
||||
namespace Core {
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
class System;
|
||||
} // namespace Core
|
||||
|
||||
@@ -25,7 +22,6 @@ class ShaderNotify;
|
||||
|
||||
namespace Tegra {
|
||||
class DmaPusher;
|
||||
class CDmaPusher;
|
||||
struct CommandList;
|
||||
|
||||
enum class RenderTargetFormat : u32 {
|
||||
@@ -88,15 +84,9 @@ enum class DepthFormat : u32 {
|
||||
D32_FLOAT_S8X24_UINT = 0x19,
|
||||
};
|
||||
|
||||
struct CommandListHeader;
|
||||
class DebugContext;
|
||||
|
||||
namespace Engines {
|
||||
class Fermi2D;
|
||||
class Maxwell3D;
|
||||
class MaxwellDMA;
|
||||
class KeplerCompute;
|
||||
class KeplerMemory;
|
||||
} // namespace Engines
|
||||
|
||||
enum class EngineID {
|
||||
@@ -190,12 +180,6 @@ public:
|
||||
/// Returns a const reference to the GPU DMA pusher.
|
||||
[[nodiscard]] const Tegra::DmaPusher& DmaPusher() const;
|
||||
|
||||
/// Returns a reference to the GPU CDMA pusher.
|
||||
[[nodiscard]] Tegra::CDmaPusher& CDmaPusher();
|
||||
|
||||
/// Returns a const reference to the GPU CDMA pusher.
|
||||
[[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const;
|
||||
|
||||
/// Returns a reference to the underlying renderer.
|
||||
[[nodiscard]] VideoCore::RendererBase& Renderer();
|
||||
|
||||
|
||||
@@ -2,12 +2,13 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
|
||||
#include <boost/container_hash/hash.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
#include "video_core/macro/macro_hle.h"
|
||||
#include "video_core/macro/macro_interpreter.h"
|
||||
@@ -24,8 +25,7 @@ void MacroEngine::AddCode(u32 method, u32 data) {
|
||||
uploaded_macro_code[method].push_back(data);
|
||||
}
|
||||
|
||||
void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
|
||||
const std::vector<u32>& parameters) {
|
||||
void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
|
||||
auto compiled_macro = macro_cache.find(method);
|
||||
if (compiled_macro != macro_cache.end()) {
|
||||
const auto& cache_info = compiled_macro->second;
|
||||
@@ -66,10 +66,9 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
|
||||
cache_info.lle_program = Compile(code);
|
||||
}
|
||||
|
||||
auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
|
||||
if (hle_program.has_value()) {
|
||||
if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {
|
||||
cache_info.has_hle_program = true;
|
||||
cache_info.hle_program = std::move(hle_program.value());
|
||||
cache_info.hle_program = std::move(hle_program);
|
||||
cache_info.hle_program->Execute(parameters, method);
|
||||
} else {
|
||||
cache_info.lle_program->Execute(parameters, method);
|
||||
|
||||
@@ -119,7 +119,7 @@ public:
|
||||
void AddCode(u32 method, u32 data);
|
||||
|
||||
// Compiles the macro if its not in the cache, and executes the compiled macro
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);
|
||||
void Execute(u32 method, const std::vector<u32>& parameters);
|
||||
|
||||
protected:
|
||||
virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
|
||||
|
||||
@@ -5,12 +5,15 @@
|
||||
#include <array>
|
||||
#include <vector>
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
#include "video_core/macro/macro_hle.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace {
|
||||
|
||||
using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
|
||||
|
||||
// HLE'd functions
|
||||
void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
||||
const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
|
||||
@@ -77,7 +80,6 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
|
||||
maxwell3d.CallMethodFromMME(0x8e5, 0x0);
|
||||
maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
|
||||
{0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
|
||||
@@ -85,25 +87,31 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
|
||||
{0x0217920100488FF7, &HLE_0217920100488FF7},
|
||||
}};
|
||||
|
||||
class HLEMacroImpl final : public CachedMacro {
|
||||
public:
|
||||
explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
|
||||
: maxwell3d{maxwell3d_}, func{func_} {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, u32 method) override {
|
||||
func(maxwell3d, parameters);
|
||||
}
|
||||
|
||||
private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
HLEFunction func;
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
|
||||
HLEMacro::~HLEMacro() = default;
|
||||
|
||||
std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
|
||||
std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const {
|
||||
const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
|
||||
[hash](const auto& pair) { return pair.first == hash; });
|
||||
if (it == hle_funcs.end()) {
|
||||
return std::nullopt;
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
|
||||
}
|
||||
|
||||
HLEMacroImpl::~HLEMacroImpl() = default;
|
||||
|
||||
HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
|
||||
: maxwell3d{maxwell3d_}, func{func_} {}
|
||||
|
||||
void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
|
||||
func(maxwell3d, parameters);
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -5,10 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
@@ -16,29 +13,17 @@ namespace Engines {
|
||||
class Maxwell3D;
|
||||
}
|
||||
|
||||
using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
|
||||
|
||||
class HLEMacro {
|
||||
public:
|
||||
explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
|
||||
~HLEMacro();
|
||||
|
||||
std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
|
||||
// Allocates and returns a cached macro if the hash matches a known function.
|
||||
// Returns nullptr otherwise.
|
||||
[[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const;
|
||||
|
||||
private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
class HLEMacroImpl : public CachedMacro {
|
||||
public:
|
||||
explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
|
||||
~HLEMacroImpl();
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, u32 method) override;
|
||||
|
||||
private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
HLEFunction func;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
@@ -11,16 +14,81 @@
|
||||
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
|
||||
|
||||
namespace Tegra {
|
||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
|
||||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
|
||||
namespace {
|
||||
class MacroInterpreterImpl final : public CachedMacro {
|
||||
public:
|
||||
explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
|
||||
: maxwell3d{maxwell3d_}, code{code_} {}
|
||||
|
||||
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
|
||||
return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
|
||||
}
|
||||
void Execute(const std::vector<u32>& params, u32 method) override;
|
||||
|
||||
MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_,
|
||||
const std::vector<u32>& code_)
|
||||
: maxwell3d{maxwell3d_}, code{code_} {}
|
||||
private:
|
||||
/// Resets the execution engine state, zeroing registers, etc.
|
||||
void Reset();
|
||||
|
||||
/**
|
||||
* Executes a single macro instruction located at the current program counter. Returns whether
|
||||
* the interpreter should keep running.
|
||||
*
|
||||
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
|
||||
* previous instruction.
|
||||
*/
|
||||
bool Step(bool is_delay_slot);
|
||||
|
||||
/// Calculates the result of an ALU operation. src_a OP src_b;
|
||||
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
|
||||
|
||||
/// Performs the result operation on the input result and stores it in the specified register
|
||||
/// (if necessary).
|
||||
void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
|
||||
|
||||
/// Evaluates the branch condition and returns whether the branch should be taken or not.
|
||||
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
|
||||
|
||||
/// Reads an opcode at the current program counter location.
|
||||
Macro::Opcode GetOpcode() const;
|
||||
|
||||
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
|
||||
u32 GetRegister(u32 register_id) const;
|
||||
|
||||
/// Sets the register to the input value.
|
||||
void SetRegister(u32 register_id, u32 value);
|
||||
|
||||
/// Sets the method address to use for the next Send instruction.
|
||||
void SetMethodAddress(u32 address);
|
||||
|
||||
/// Calls a GPU Engine method with the input parameter.
|
||||
void Send(u32 value);
|
||||
|
||||
/// Reads a GPU register located at the method address.
|
||||
u32 Read(u32 method) const;
|
||||
|
||||
/// Returns the next parameter in the parameter queue.
|
||||
u32 FetchParameter();
|
||||
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
|
||||
/// Current program counter
|
||||
u32 pc{};
|
||||
/// Program counter to execute at after the delay slot is executed.
|
||||
std::optional<u32> delayed_pc;
|
||||
|
||||
/// General purpose macro registers.
|
||||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
|
||||
|
||||
/// Method address to use for the next Send instruction.
|
||||
Macro::MethodAddress method_address = {};
|
||||
|
||||
/// Input parameters of the current macro.
|
||||
std::unique_ptr<u32[]> parameters;
|
||||
std::size_t num_parameters = 0;
|
||||
std::size_t parameters_capacity = 0;
|
||||
/// Index of the next parameter that will be fetched by the 'parm' instruction.
|
||||
u32 next_parameter_index = 0;
|
||||
|
||||
bool carry_flag = false;
|
||||
const std::vector<u32>& code;
|
||||
};
|
||||
|
||||
void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) {
|
||||
MICROPROFILE_SCOPE(MacroInterp);
|
||||
@@ -283,5 +351,13 @@ u32 MacroInterpreterImpl::FetchParameter() {
|
||||
ASSERT(next_parameter_index < num_parameters);
|
||||
return parameters[next_parameter_index++];
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
|
||||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
|
||||
|
||||
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
|
||||
return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -3,10 +3,9 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include <optional>
|
||||
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
|
||||
@@ -26,77 +25,4 @@ private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
class MacroInterpreterImpl : public CachedMacro {
|
||||
public:
|
||||
explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
|
||||
void Execute(const std::vector<u32>& params, u32 method) override;
|
||||
|
||||
private:
|
||||
/// Resets the execution engine state, zeroing registers, etc.
|
||||
void Reset();
|
||||
|
||||
/**
|
||||
* Executes a single macro instruction located at the current program counter. Returns whether
|
||||
* the interpreter should keep running.
|
||||
*
|
||||
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
|
||||
* previous instruction.
|
||||
*/
|
||||
bool Step(bool is_delay_slot);
|
||||
|
||||
/// Calculates the result of an ALU operation. src_a OP src_b;
|
||||
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
|
||||
|
||||
/// Performs the result operation on the input result and stores it in the specified register
|
||||
/// (if necessary).
|
||||
void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
|
||||
|
||||
/// Evaluates the branch condition and returns whether the branch should be taken or not.
|
||||
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
|
||||
|
||||
/// Reads an opcode at the current program counter location.
|
||||
Macro::Opcode GetOpcode() const;
|
||||
|
||||
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
|
||||
u32 GetRegister(u32 register_id) const;
|
||||
|
||||
/// Sets the register to the input value.
|
||||
void SetRegister(u32 register_id, u32 value);
|
||||
|
||||
/// Sets the method address to use for the next Send instruction.
|
||||
void SetMethodAddress(u32 address);
|
||||
|
||||
/// Calls a GPU Engine method with the input parameter.
|
||||
void Send(u32 value);
|
||||
|
||||
/// Reads a GPU register located at the method address.
|
||||
u32 Read(u32 method) const;
|
||||
|
||||
/// Returns the next parameter in the parameter queue.
|
||||
u32 FetchParameter();
|
||||
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
|
||||
/// Current program counter
|
||||
u32 pc;
|
||||
/// Program counter to execute at after the delay slot is executed.
|
||||
std::optional<u32> delayed_pc;
|
||||
|
||||
/// General purpose macro registers.
|
||||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
|
||||
|
||||
/// Method address to use for the next Send instruction.
|
||||
Macro::MethodAddress method_address = {};
|
||||
|
||||
/// Input parameters of the current macro.
|
||||
std::unique_ptr<u32[]> parameters;
|
||||
std::size_t num_parameters = 0;
|
||||
std::size_t parameters_capacity = 0;
|
||||
/// Index of the next parameter that will be fetched by the 'parm' instruction.
|
||||
u32 next_parameter_index = 0;
|
||||
|
||||
bool carry_flag = false;
|
||||
const std::vector<u32>& code;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -2,9 +2,17 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <optional>
|
||||
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/x64/xbyak_abi.h"
|
||||
#include "common/x64/xbyak_util.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro/macro_interpreter.h"
|
||||
@@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
|
||||
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
|
||||
|
||||
namespace Tegra {
|
||||
namespace {
|
||||
constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
|
||||
constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
|
||||
constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
|
||||
constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
|
||||
constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
|
||||
|
||||
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
|
||||
constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
|
||||
STATE,
|
||||
RESULT,
|
||||
PARAMETERS,
|
||||
@@ -28,19 +37,75 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
|
||||
BRANCH_HOLDER,
|
||||
});
|
||||
|
||||
MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
|
||||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
|
||||
// Arbitrarily chosen based on current booting games.
|
||||
constexpr size_t MAX_CODE_SIZE = 0x10000;
|
||||
|
||||
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
|
||||
return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
|
||||
std::bitset<32> PersistentCallerSavedRegs() {
|
||||
return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
|
||||
}
|
||||
|
||||
MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
|
||||
: CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} {
|
||||
Compile();
|
||||
}
|
||||
class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro {
|
||||
public:
|
||||
explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
|
||||
: CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} {
|
||||
Compile();
|
||||
}
|
||||
|
||||
MacroJITx64Impl::~MacroJITx64Impl() = default;
|
||||
void Execute(const std::vector<u32>& parameters, u32 method) override;
|
||||
|
||||
void Compile_ALU(Macro::Opcode opcode);
|
||||
void Compile_AddImmediate(Macro::Opcode opcode);
|
||||
void Compile_ExtractInsert(Macro::Opcode opcode);
|
||||
void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
|
||||
void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
|
||||
void Compile_Read(Macro::Opcode opcode);
|
||||
void Compile_Branch(Macro::Opcode opcode);
|
||||
|
||||
private:
|
||||
void Optimizer_ScanFlags();
|
||||
|
||||
void Compile();
|
||||
bool Compile_NextInstruction();
|
||||
|
||||
Xbyak::Reg32 Compile_FetchParameter();
|
||||
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
|
||||
|
||||
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
|
||||
void Compile_Send(Xbyak::Reg32 value);
|
||||
|
||||
Macro::Opcode GetOpCode() const;
|
||||
|
||||
struct JITState {
|
||||
Engines::Maxwell3D* maxwell3d{};
|
||||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
|
||||
u32 carry_flag{};
|
||||
};
|
||||
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
|
||||
using ProgramType = void (*)(JITState*, const u32*);
|
||||
|
||||
struct OptimizerState {
|
||||
bool can_skip_carry{};
|
||||
bool has_delayed_pc{};
|
||||
bool zero_reg_skip{};
|
||||
bool skip_dummy_addimmediate{};
|
||||
bool optimize_for_method_move{};
|
||||
bool enable_asserts{};
|
||||
};
|
||||
OptimizerState optimizer{};
|
||||
|
||||
std::optional<Macro::Opcode> next_opcode{};
|
||||
ProgramType program{nullptr};
|
||||
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
|
||||
Xbyak::Label end_of_code{};
|
||||
|
||||
bool is_delay_slot{};
|
||||
u32 pc{};
|
||||
|
||||
const std::vector<u32>& code;
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
|
||||
MICROPROFILE_SCOPE(MacroJitExecute);
|
||||
@@ -307,11 +372,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
|
||||
void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
|
||||
maxwell3d->CallMethodFromMME(method_address.address, value);
|
||||
}
|
||||
|
||||
void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
|
||||
void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
|
||||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||
mov(Common::X64::ABI_PARAM1, qword[STATE]);
|
||||
mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
|
||||
@@ -338,7 +403,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
|
||||
L(dont_process);
|
||||
}
|
||||
|
||||
void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
|
||||
void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
|
||||
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
|
||||
const s32 jump_address =
|
||||
static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
|
||||
@@ -392,7 +457,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
|
||||
L(end);
|
||||
}
|
||||
|
||||
void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() {
|
||||
void MacroJITx64Impl::Optimizer_ScanFlags() {
|
||||
optimizer.can_skip_carry = true;
|
||||
optimizer.has_delayed_pc = false;
|
||||
for (auto raw_op : code) {
|
||||
@@ -534,7 +599,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
|
||||
return true;
|
||||
}
|
||||
|
||||
Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
|
||||
Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
|
||||
mov(eax, dword[PARAMETERS]);
|
||||
add(PARAMETERS, sizeof(u32));
|
||||
return eax;
|
||||
@@ -611,9 +676,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const {
|
||||
ASSERT(pc < code.size());
|
||||
return {code[pc]};
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const {
|
||||
return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
|
||||
MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
|
||||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
|
||||
|
||||
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
|
||||
return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -4,12 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/x64/xbyak_abi.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
|
||||
namespace Tegra {
|
||||
@@ -18,9 +13,6 @@ namespace Engines {
|
||||
class Maxwell3D;
|
||||
}
|
||||
|
||||
/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games
|
||||
constexpr size_t MAX_CODE_SIZE = 0x10000;
|
||||
|
||||
class MacroJITx64 final : public MacroEngine {
|
||||
public:
|
||||
explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_);
|
||||
@@ -32,67 +24,4 @@ private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
|
||||
public:
|
||||
explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
|
||||
~MacroJITx64Impl();
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, u32 method) override;
|
||||
|
||||
void Compile_ALU(Macro::Opcode opcode);
|
||||
void Compile_AddImmediate(Macro::Opcode opcode);
|
||||
void Compile_ExtractInsert(Macro::Opcode opcode);
|
||||
void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
|
||||
void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
|
||||
void Compile_Read(Macro::Opcode opcode);
|
||||
void Compile_Branch(Macro::Opcode opcode);
|
||||
|
||||
private:
|
||||
void Optimizer_ScanFlags();
|
||||
|
||||
void Compile();
|
||||
bool Compile_NextInstruction();
|
||||
|
||||
Xbyak::Reg32 Compile_FetchParameter();
|
||||
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
|
||||
|
||||
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
|
||||
void Compile_Send(Xbyak::Reg32 value);
|
||||
|
||||
Macro::Opcode GetOpCode() const;
|
||||
std::bitset<32> PersistentCallerSavedRegs() const;
|
||||
|
||||
struct JITState {
|
||||
Engines::Maxwell3D* maxwell3d{};
|
||||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
|
||||
u32 carry_flag{};
|
||||
};
|
||||
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
|
||||
using ProgramType = void (*)(JITState*, const u32*);
|
||||
|
||||
struct OptimizerState {
|
||||
bool can_skip_carry{};
|
||||
bool has_delayed_pc{};
|
||||
bool zero_reg_skip{};
|
||||
bool skip_dummy_addimmediate{};
|
||||
bool optimize_for_method_move{};
|
||||
bool enable_asserts{};
|
||||
};
|
||||
OptimizerState optimizer{};
|
||||
|
||||
std::optional<Macro::Opcode> next_opcode{};
|
||||
ProgramType program{nullptr};
|
||||
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
|
||||
Xbyak::Label end_of_code{};
|
||||
|
||||
bool is_delay_slot{};
|
||||
u32 pc{};
|
||||
std::optional<u32> delayed_pc;
|
||||
|
||||
const std::vector<u32>& code;
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -214,7 +214,7 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im
|
||||
|
||||
{
|
||||
VkImageMemoryBarrier fsr_write_barrier = base_barrier;
|
||||
fsr_write_barrier.image = *images[image_index],
|
||||
fsr_write_barrier.image = *images[image_index];
|
||||
fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
|
||||
@@ -393,6 +393,8 @@ void Config::ReadControlValues() {
|
||||
ReadGlobalSetting(Settings::values.enable_accurate_vibrations);
|
||||
ReadGlobalSetting(Settings::values.motion_enabled);
|
||||
|
||||
ReadBasicSetting(Settings::values.controller_navigation);
|
||||
|
||||
qt_config->endGroup();
|
||||
}
|
||||
|
||||
@@ -1001,6 +1003,7 @@ void Config::SaveControlValues() {
|
||||
WriteBasicSetting(Settings::values.keyboard_enabled);
|
||||
WriteBasicSetting(Settings::values.emulate_analog_keyboard);
|
||||
WriteBasicSetting(Settings::values.mouse_panning_sensitivity);
|
||||
WriteBasicSetting(Settings::values.controller_navigation);
|
||||
|
||||
WriteBasicSetting(Settings::values.tas_enable);
|
||||
WriteBasicSetting(Settings::values.tas_loop);
|
||||
|
||||
@@ -429,7 +429,7 @@
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>AMD FidelityFX™️ Super Resolution [Vulkan Only]</string>
|
||||
<string>AMD FidelityFX™️ Super Resolution (Vulkan Only)</string>
|
||||
</property>
|
||||
</item>
|
||||
</widget>
|
||||
|
||||
@@ -131,6 +131,7 @@ void ConfigureInputAdvanced::ApplyConfiguration() {
|
||||
Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked();
|
||||
Settings::values.enable_raw_input = ui->enable_raw_input->isChecked();
|
||||
Settings::values.enable_udp_controller = ui->enable_udp_controller->isChecked();
|
||||
Settings::values.controller_navigation = ui->controller_navigation->isChecked();
|
||||
}
|
||||
|
||||
void ConfigureInputAdvanced::LoadConfiguration() {
|
||||
@@ -162,6 +163,7 @@ void ConfigureInputAdvanced::LoadConfiguration() {
|
||||
ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled);
|
||||
ui->enable_raw_input->setChecked(Settings::values.enable_raw_input.GetValue());
|
||||
ui->enable_udp_controller->setChecked(Settings::values.enable_udp_controller.GetValue());
|
||||
ui->controller_navigation->setChecked(Settings::values.controller_navigation.GetValue());
|
||||
|
||||
UpdateUIEnabled();
|
||||
}
|
||||
|
||||
@@ -2655,6 +2655,19 @@
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="0">
|
||||
<widget class="QCheckBox" name="controller_navigation">
|
||||
<property name="minimumSize">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>23</height>
|
||||
</size>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Controller navigation</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0">
|
||||
<widget class="QCheckBox" name="mouse_panning">
|
||||
<property name="minimumSize">
|
||||
<size>
|
||||
@@ -2667,7 +2680,7 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="2">
|
||||
<item row="5" column="2">
|
||||
<widget class="QSpinBox" name="mouse_panning_sensitivity">
|
||||
<property name="toolTip">
|
||||
<string>Mouse sensitivity</string>
|
||||
@@ -2689,14 +2702,14 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0">
|
||||
<item row="6" column="0">
|
||||
<widget class="QLabel" name="motion_touch">
|
||||
<property name="text">
|
||||
<string>Motion / Touch</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="2">
|
||||
<item row="6" column="2">
|
||||
<widget class="QPushButton" name="buttonMotionTouch">
|
||||
<property name="text">
|
||||
<string>Configure</string>
|
||||
|
||||
@@ -147,7 +147,7 @@ QString ConfigureInputPlayer::ButtonToText(const Common::ParamPackage& param) {
|
||||
// Retrieve the names from Qt
|
||||
if (param.Get("engine", "") == "keyboard") {
|
||||
const QString button_str = GetKeyName(param.Get("code", 0));
|
||||
return QObject::tr("%1%2").arg(toggle, button_str);
|
||||
return QObject::tr("%1%2%3").arg(toggle, inverted, button_str);
|
||||
}
|
||||
|
||||
if (common_button_name == Common::Input::ButtonNames::Invalid) {
|
||||
@@ -341,7 +341,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
|
||||
emulated_controller->SetButtonParam(button_id, {});
|
||||
button_map[button_id]->setText(tr("[not set]"));
|
||||
});
|
||||
if (param.Has("button") || param.Has("hat")) {
|
||||
if (param.Has("code") || param.Has("button") || param.Has("hat")) {
|
||||
context_menu.addAction(tr("Toggle button"), [&] {
|
||||
const bool toggle_value = !param.Get("toggle", false);
|
||||
param.Set("toggle", toggle_value);
|
||||
@@ -349,8 +349,8 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
|
||||
emulated_controller->SetButtonParam(button_id, param);
|
||||
});
|
||||
context_menu.addAction(tr("Invert button"), [&] {
|
||||
const bool toggle_value = !param.Get("inverted", false);
|
||||
param.Set("inverted", toggle_value);
|
||||
const bool invert_value = !param.Get("inverted", false);
|
||||
param.Set("inverted", invert_value);
|
||||
button_map[button_id]->setText(ButtonToText(param));
|
||||
emulated_controller->SetButtonParam(button_id, param);
|
||||
});
|
||||
@@ -522,28 +522,37 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
|
||||
|
||||
analog_map_modifier_button[analog_id]->setContextMenuPolicy(Qt::CustomContextMenu);
|
||||
|
||||
connect(analog_map_modifier_button[analog_id], &QPushButton::customContextMenuRequested,
|
||||
[=, this](const QPoint& menu_location) {
|
||||
QMenu context_menu;
|
||||
Common::ParamPackage param = emulated_controller->GetStickParam(analog_id);
|
||||
context_menu.addAction(tr("Clear"), [&] {
|
||||
param.Set("modifier", "");
|
||||
analog_map_modifier_button[analog_id]->setText(tr("[not set]"));
|
||||
emulated_controller->SetStickParam(analog_id, param);
|
||||
});
|
||||
context_menu.addAction(tr("Toggle button"), [&] {
|
||||
Common::ParamPackage modifier_param =
|
||||
Common::ParamPackage{param.Get("modifier", "")};
|
||||
const bool toggle_value = !modifier_param.Get("toggle", false);
|
||||
modifier_param.Set("toggle", toggle_value);
|
||||
param.Set("modifier", modifier_param.Serialize());
|
||||
analog_map_modifier_button[analog_id]->setText(
|
||||
ButtonToText(modifier_param));
|
||||
emulated_controller->SetStickParam(analog_id, param);
|
||||
});
|
||||
context_menu.exec(
|
||||
analog_map_modifier_button[analog_id]->mapToGlobal(menu_location));
|
||||
connect(
|
||||
analog_map_modifier_button[analog_id], &QPushButton::customContextMenuRequested,
|
||||
[=, this](const QPoint& menu_location) {
|
||||
QMenu context_menu;
|
||||
Common::ParamPackage param = emulated_controller->GetStickParam(analog_id);
|
||||
context_menu.addAction(tr("Clear"), [&] {
|
||||
param.Set("modifier", "");
|
||||
analog_map_modifier_button[analog_id]->setText(tr("[not set]"));
|
||||
emulated_controller->SetStickParam(analog_id, param);
|
||||
});
|
||||
context_menu.addAction(tr("Toggle button"), [&] {
|
||||
Common::ParamPackage modifier_param =
|
||||
Common::ParamPackage{param.Get("modifier", "")};
|
||||
const bool toggle_value = !modifier_param.Get("toggle", false);
|
||||
modifier_param.Set("toggle", toggle_value);
|
||||
param.Set("modifier", modifier_param.Serialize());
|
||||
analog_map_modifier_button[analog_id]->setText(ButtonToText(modifier_param));
|
||||
emulated_controller->SetStickParam(analog_id, param);
|
||||
});
|
||||
context_menu.addAction(tr("Invert button"), [&] {
|
||||
Common::ParamPackage modifier_param =
|
||||
Common::ParamPackage{param.Get("modifier", "")};
|
||||
const bool invert_value = !modifier_param.Get("inverted", false);
|
||||
modifier_param.Set("inverted", invert_value);
|
||||
param.Set("modifier", modifier_param.Serialize());
|
||||
analog_map_modifier_button[analog_id]->setText(ButtonToText(modifier_param));
|
||||
emulated_controller->SetStickParam(analog_id, param);
|
||||
});
|
||||
context_menu.exec(
|
||||
analog_map_modifier_button[analog_id]->mapToGlobal(menu_location));
|
||||
});
|
||||
|
||||
connect(analog_map_range_spinbox[analog_id], qOverload<int>(&QSpinBox::valueChanged),
|
||||
[=, this] {
|
||||
|
||||
@@ -190,6 +190,9 @@ void ControllerShortcut::ControllerUpdateEvent(Core::HID::ControllerTriggerType
|
||||
if (type != Core::HID::ControllerTriggerType::Button) {
|
||||
return;
|
||||
}
|
||||
if (!Settings::values.controller_navigation) {
|
||||
return;
|
||||
}
|
||||
if (button_sequence.npad.raw == Core::HID::NpadButton::None &&
|
||||
button_sequence.capture.raw == 0 && button_sequence.home.raw == 0) {
|
||||
return;
|
||||
|
||||
@@ -40,6 +40,9 @@ void ControllerNavigation::TriggerButton(Settings::NativeButton::Values native_b
|
||||
|
||||
void ControllerNavigation::ControllerUpdateEvent(Core::HID::ControllerTriggerType type) {
|
||||
std::lock_guard lock{mutex};
|
||||
if (!Settings::values.controller_navigation) {
|
||||
return;
|
||||
}
|
||||
if (type == Core::HID::ControllerTriggerType::Button) {
|
||||
ControllerUpdateButton();
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user