Compare commits
98 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8603f0f9b1 | ||
|
|
0aa824b12f | ||
|
|
037d9bdde3 | ||
|
|
633ce92908 | ||
|
|
4a84921b31 | ||
|
|
add8b1df68 | ||
|
|
0ea2771889 | ||
|
|
9ae680c639 | ||
|
|
46fdf8c819 | ||
|
|
4a4e87e971 | ||
|
|
e6f652ae12 | ||
|
|
6ce8de4b5f | ||
|
|
45ef421b6b | ||
|
|
fedef7bda3 | ||
|
|
6ee0ba64c8 | ||
|
|
9909d40530 | ||
|
|
160fc63c72 | ||
|
|
78c803b4f3 | ||
|
|
1143923cdd | ||
|
|
d10dffed44 | ||
|
|
e7ac5a6adf | ||
|
|
fbb82e61e3 | ||
|
|
69749a88cd | ||
|
|
d26ee6e01e | ||
|
|
e99a148628 | ||
|
|
c2d4c8b95e | ||
|
|
48a461a629 | ||
|
|
24e2e601d5 | ||
|
|
3b63a46ca4 | ||
|
|
c63a0e88b7 | ||
|
|
1a4d733ec7 | ||
|
|
d9e9e71aec | ||
|
|
4f352833a5 | ||
|
|
d03ae881fd | ||
|
|
960057cba0 | ||
|
|
d41d85766f | ||
|
|
3293877456 | ||
|
|
64e7524f36 | ||
|
|
076c76f4e4 | ||
|
|
ed0bdcc638 | ||
|
|
84ad81ee67 | ||
|
|
63aa08acbe | ||
|
|
3f1b4fb23a | ||
|
|
aaa373585c | ||
|
|
7b574f406b | ||
|
|
65651078e5 | ||
|
|
d2ff93c319 | ||
|
|
ac51d048a9 | ||
|
|
4483089d70 | ||
|
|
d6015ee211 | ||
|
|
81e086b5ac | ||
|
|
75b417489a | ||
|
|
e9b05e86b9 | ||
|
|
8ee78521fa | ||
|
|
c8d6f0cb82 | ||
|
|
9ac176d5a3 | ||
|
|
234f00bdd4 | ||
|
|
5a57b1a09b | ||
|
|
22f105c06d | ||
|
|
10f08ab9ec | ||
|
|
196cc82913 | ||
|
|
f9ee0dc7ee | ||
|
|
221613d4ea | ||
|
|
7526b6fce3 | ||
|
|
ad9dbeb44b | ||
|
|
c161389a0f | ||
|
|
9d9676f620 | ||
|
|
ec6664f6d6 | ||
|
|
42085ff110 | ||
|
|
79f970e6de | ||
|
|
02bc9e9de1 | ||
|
|
cc92c054ec | ||
|
|
4130b07f88 | ||
|
|
40de7f6fe8 | ||
|
|
6c42a23550 | ||
|
|
fad20213e6 | ||
|
|
f59040d752 | ||
|
|
0be8fffc99 | ||
|
|
07e13d6728 | ||
|
|
6ad66acce2 | ||
|
|
b114928459 | ||
|
|
319365fdf0 | ||
|
|
697a4669e1 | ||
|
|
b5f0dc95db | ||
|
|
90febaf717 | ||
|
|
1f6571b3de | ||
|
|
be6bf37224 | ||
|
|
aa30fd75cd | ||
|
|
169d19f7b9 | ||
|
|
71c30a0a89 | ||
|
|
5159f4eee8 | ||
|
|
e85066dac7 | ||
|
|
bb3ab7d66c | ||
|
|
27ddbeb01c | ||
|
|
0ad3c031f4 | ||
|
|
0ccd490fcd | ||
|
|
5ca63d0675 | ||
|
|
48e6f77c03 |
@@ -24,7 +24,7 @@ matrix:
|
||||
- os: osx
|
||||
env: NAME="macos build"
|
||||
sudo: false
|
||||
osx_image: xcode10
|
||||
osx_image: xcode10.1
|
||||
install: "./.travis/macos/deps.sh"
|
||||
script: "./.travis/macos/build.sh"
|
||||
after_success: "./.travis/macos/upload.sh"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
set -o pipefail
|
||||
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.13
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.14
|
||||
export Qt5_DIR=$(brew --prefix)/opt/qt5
|
||||
export UNICORNDIR=$(pwd)/externals/unicorn
|
||||
export PATH="/usr/local/opt/ccache/libexec:$PATH"
|
||||
|
||||
@@ -73,6 +73,7 @@ set(HASH_FILES
|
||||
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/memory.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/texture.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/other.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include "common/ring_buffer.h"
|
||||
#include "core/settings.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _WIN32
|
||||
#include <objbase.h>
|
||||
#endif
|
||||
|
||||
@@ -113,7 +113,7 @@ private:
|
||||
|
||||
CubebSink::CubebSink(std::string_view target_device_name) {
|
||||
// Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _WIN32
|
||||
com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
|
||||
#endif
|
||||
|
||||
@@ -152,7 +152,7 @@ CubebSink::~CubebSink() {
|
||||
|
||||
cubeb_destroy(ctx);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _WIN32
|
||||
if (SUCCEEDED(com_init_result)) {
|
||||
CoUninitialize();
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ private:
|
||||
cubeb_devid output_device{};
|
||||
std::vector<SinkStreamPtr> sink_streams;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _WIN32
|
||||
u32 com_init_result = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp
|
||||
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/memory.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/texture.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/other.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
|
||||
|
||||
@@ -111,12 +111,6 @@
|
||||
template <std::size_t Position, std::size_t Bits, typename T>
|
||||
struct BitField {
|
||||
private:
|
||||
// We hide the copy assigment operator here, because the default copy
|
||||
// assignment would copy the full storage value, rather than just the bits
|
||||
// relevant to this particular bit field.
|
||||
// We don't delete it because we want BitField to be trivially copyable.
|
||||
constexpr BitField& operator=(const BitField&) = default;
|
||||
|
||||
// UnderlyingType is T for non-enum types and the underlying type of T if
|
||||
// T is an enumeration. Note that T is wrapped within an enable_if in the
|
||||
// former case to workaround compile errors which arise when using
|
||||
@@ -163,9 +157,13 @@ public:
|
||||
BitField(T val) = delete;
|
||||
BitField& operator=(T val) = delete;
|
||||
|
||||
// Force default constructor to be created
|
||||
// so that we can use this within unions
|
||||
constexpr BitField() = default;
|
||||
constexpr BitField() noexcept = default;
|
||||
|
||||
constexpr BitField(const BitField&) noexcept = default;
|
||||
constexpr BitField& operator=(const BitField&) noexcept = default;
|
||||
|
||||
constexpr BitField(BitField&&) noexcept = default;
|
||||
constexpr BitField& operator=(BitField&&) noexcept = default;
|
||||
|
||||
constexpr FORCE_INLINE operator T() const {
|
||||
return Value();
|
||||
|
||||
@@ -217,6 +217,7 @@ add_library(core STATIC
|
||||
hle/service/audio/audren_u.h
|
||||
hle/service/audio/codecctl.cpp
|
||||
hle/service/audio/codecctl.h
|
||||
hle/service/audio/errors.h
|
||||
hle/service/audio/hwopus.cpp
|
||||
hle/service/audio/hwopus.h
|
||||
hle/service/bcat/bcat.cpp
|
||||
|
||||
@@ -36,7 +36,8 @@
|
||||
#include "frontend/applets/software_keyboard.h"
|
||||
#include "frontend/applets/web_browser.h"
|
||||
#include "video_core/debug_utils/debug_utils.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_asynch.h"
|
||||
#include "video_core/gpu_synch.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
@@ -78,6 +79,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
|
||||
return vfs->OpenFile(path, FileSys::Mode::Read);
|
||||
}
|
||||
struct System::Impl {
|
||||
explicit Impl(System& system) : kernel{system} {}
|
||||
|
||||
Cpu& CurrentCpuCore() {
|
||||
return cpu_core_manager.GetCurrentCore();
|
||||
@@ -95,7 +97,7 @@ struct System::Impl {
|
||||
LOG_DEBUG(HW_Memory, "initialized OK");
|
||||
|
||||
core_timing.Initialize();
|
||||
kernel.Initialize(core_timing);
|
||||
kernel.Initialize();
|
||||
|
||||
const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch());
|
||||
@@ -128,10 +130,16 @@ struct System::Impl {
|
||||
return ResultStatus::ErrorVideoCore;
|
||||
}
|
||||
|
||||
gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer());
|
||||
is_powered_on = true;
|
||||
|
||||
if (Settings::values.use_asynchronous_gpu_emulation) {
|
||||
gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
|
||||
} else {
|
||||
gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
|
||||
}
|
||||
|
||||
cpu_core_manager.Initialize(system);
|
||||
is_powered_on = true;
|
||||
|
||||
LOG_DEBUG(Core, "Initialized OK");
|
||||
|
||||
// Reset counters and set time origin to current frame
|
||||
@@ -182,13 +190,13 @@ struct System::Impl {
|
||||
|
||||
void Shutdown() {
|
||||
// Log last frame performance stats
|
||||
auto perf_results = GetAndResetPerfStats();
|
||||
Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
|
||||
perf_results.emulation_speed * 100.0);
|
||||
Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
|
||||
perf_results.game_fps);
|
||||
Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
|
||||
perf_results.frametime * 1000.0);
|
||||
const auto perf_results = GetAndResetPerfStats();
|
||||
telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
|
||||
perf_results.emulation_speed * 100.0);
|
||||
telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
|
||||
perf_results.game_fps);
|
||||
telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
|
||||
perf_results.frametime * 1000.0);
|
||||
|
||||
is_powered_on = false;
|
||||
|
||||
@@ -265,7 +273,7 @@ struct System::Impl {
|
||||
Core::FrameLimiter frame_limiter;
|
||||
};
|
||||
|
||||
System::System() : impl{std::make_unique<Impl>()} {}
|
||||
System::System() : impl{std::make_unique<Impl>(*this)} {}
|
||||
System::~System() = default;
|
||||
|
||||
Cpu& System::CurrentCpuCore() {
|
||||
|
||||
@@ -293,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() {
|
||||
return System::GetInstance().CurrentArmInterface();
|
||||
}
|
||||
|
||||
inline TelemetrySession& Telemetry() {
|
||||
return System::GetInstance().TelemetrySession();
|
||||
}
|
||||
|
||||
inline Kernel::Process* CurrentProcess() {
|
||||
return System::GetInstance().CurrentProcess();
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#endif
|
||||
#include "core/arm/exclusive_monitor.h"
|
||||
#include "core/arm/unicorn/arm_unicorn.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_cpu.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/hle/kernel/scheduler.h"
|
||||
@@ -49,9 +50,9 @@ bool CpuBarrier::Rendezvous() {
|
||||
return false;
|
||||
}
|
||||
|
||||
Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
|
||||
CpuBarrier& cpu_barrier, std::size_t core_index)
|
||||
: cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} {
|
||||
Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
|
||||
std::size_t core_index)
|
||||
: cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
|
||||
if (Settings::values.use_cpu_jit) {
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
|
||||
@@ -63,7 +64,7 @@ Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
|
||||
arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
|
||||
}
|
||||
|
||||
scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface);
|
||||
scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
|
||||
}
|
||||
|
||||
Cpu::~Cpu() = default;
|
||||
|
||||
@@ -15,6 +15,10 @@ namespace Kernel {
|
||||
class Scheduler;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
}
|
||||
@@ -45,8 +49,8 @@ private:
|
||||
|
||||
class Cpu {
|
||||
public:
|
||||
Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
|
||||
CpuBarrier& cpu_barrier, std::size_t core_index);
|
||||
Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
|
||||
std::size_t core_index);
|
||||
~Cpu();
|
||||
|
||||
void RunLoop(bool tight_loop = true);
|
||||
|
||||
@@ -27,8 +27,7 @@ void CpuCoreManager::Initialize(System& system) {
|
||||
exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
|
||||
|
||||
for (std::size_t index = 0; index < cores.size(); ++index) {
|
||||
cores[index] =
|
||||
std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index);
|
||||
cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
|
||||
}
|
||||
|
||||
// Create threads for CPU cores 1-3, and build thread_to_cpu map
|
||||
|
||||
@@ -4,10 +4,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/swap.h"
|
||||
#include "core/hle/kernel/errors.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
namespace IPC {
|
||||
|
||||
|
||||
@@ -350,7 +350,7 @@ public:
|
||||
template <class T>
|
||||
std::shared_ptr<T> PopIpcInterface() {
|
||||
ASSERT(context->Session()->IsDomain());
|
||||
ASSERT(context->GetDomainMessageHeader()->input_object_count > 0);
|
||||
ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
|
||||
return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_cpu.h"
|
||||
#include "core/hle/kernel/address_arbiter.h"
|
||||
#include "core/hle/kernel/errors.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
@@ -17,32 +18,144 @@
|
||||
#include "core/hle/result.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
namespace Kernel::AddressArbiter {
|
||||
namespace Kernel {
|
||||
namespace {
|
||||
// Wake up num_to_wake (or all) threads in a vector.
|
||||
void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
|
||||
// Only process up to 'target' threads, unless 'target' is <= 0, in which case process
|
||||
// them all.
|
||||
std::size_t last = waiting_threads.size();
|
||||
if (num_to_wake > 0) {
|
||||
last = num_to_wake;
|
||||
}
|
||||
|
||||
// Performs actual address waiting logic.
|
||||
static ResultCode WaitForAddress(VAddr address, s64 timeout) {
|
||||
SharedPtr<Thread> current_thread = GetCurrentThread();
|
||||
// Signal the waiting threads.
|
||||
for (std::size_t i = 0; i < last; i++) {
|
||||
ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
|
||||
waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
|
||||
waiting_threads[i]->SetArbiterWaitAddress(0);
|
||||
waiting_threads[i]->ResumeFromWait();
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
|
||||
AddressArbiter::~AddressArbiter() = default;
|
||||
|
||||
ResultCode AddressArbiter::SignalToAddress(VAddr address, s32 num_to_wake) {
|
||||
const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
|
||||
WakeThreads(waiting_threads, num_to_wake);
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
|
||||
s32 num_to_wake) {
|
||||
// Ensure that we can write to the address.
|
||||
if (!Memory::IsValidVirtualAddress(address)) {
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
|
||||
if (static_cast<s32>(Memory::Read32(address)) != value) {
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
Memory::Write32(address, static_cast<u32>(value + 1));
|
||||
return SignalToAddress(address, num_to_wake);
|
||||
}
|
||||
|
||||
ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
|
||||
s32 num_to_wake) {
|
||||
// Ensure that we can write to the address.
|
||||
if (!Memory::IsValidVirtualAddress(address)) {
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
|
||||
// Get threads waiting on the address.
|
||||
const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
|
||||
|
||||
// Determine the modified value depending on the waiting count.
|
||||
s32 updated_value;
|
||||
if (waiting_threads.empty()) {
|
||||
updated_value = value - 1;
|
||||
} else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
|
||||
updated_value = value + 1;
|
||||
} else {
|
||||
updated_value = value;
|
||||
}
|
||||
|
||||
if (static_cast<s32>(Memory::Read32(address)) != value) {
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
Memory::Write32(address, static_cast<u32>(updated_value));
|
||||
WakeThreads(waiting_threads, num_to_wake);
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
|
||||
bool should_decrement) {
|
||||
// Ensure that we can read the address.
|
||||
if (!Memory::IsValidVirtualAddress(address)) {
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
|
||||
const s32 cur_value = static_cast<s32>(Memory::Read32(address));
|
||||
if (cur_value >= value) {
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
if (should_decrement) {
|
||||
Memory::Write32(address, static_cast<u32>(cur_value - 1));
|
||||
}
|
||||
|
||||
// Short-circuit without rescheduling, if timeout is zero.
|
||||
if (timeout == 0) {
|
||||
return RESULT_TIMEOUT;
|
||||
}
|
||||
|
||||
return WaitForAddress(address, timeout);
|
||||
}
|
||||
|
||||
ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
|
||||
// Ensure that we can read the address.
|
||||
if (!Memory::IsValidVirtualAddress(address)) {
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
// Only wait for the address if equal.
|
||||
if (static_cast<s32>(Memory::Read32(address)) != value) {
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
// Short-circuit without rescheduling, if timeout is zero.
|
||||
if (timeout == 0) {
|
||||
return RESULT_TIMEOUT;
|
||||
}
|
||||
|
||||
return WaitForAddress(address, timeout);
|
||||
}
|
||||
|
||||
ResultCode AddressArbiter::WaitForAddress(VAddr address, s64 timeout) {
|
||||
SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
|
||||
current_thread->SetArbiterWaitAddress(address);
|
||||
current_thread->SetStatus(ThreadStatus::WaitArb);
|
||||
current_thread->InvalidateWakeupCallback();
|
||||
|
||||
current_thread->WakeAfterDelay(timeout);
|
||||
|
||||
Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
|
||||
system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
|
||||
return RESULT_TIMEOUT;
|
||||
}
|
||||
|
||||
// Gets the threads waiting on an address.
|
||||
static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
|
||||
const auto RetrieveWaitingThreads = [](std::size_t core_index,
|
||||
std::vector<SharedPtr<Thread>>& waiting_threads,
|
||||
VAddr arb_addr) {
|
||||
const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
|
||||
std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
|
||||
const auto RetrieveWaitingThreads = [this](std::size_t core_index,
|
||||
std::vector<SharedPtr<Thread>>& waiting_threads,
|
||||
VAddr arb_addr) {
|
||||
const auto& scheduler = system.Scheduler(core_index);
|
||||
const auto& thread_list = scheduler.GetThreadList();
|
||||
|
||||
for (const auto& thread : thread_list) {
|
||||
if (thread->GetArbiterWaitAddress() == arb_addr)
|
||||
if (thread->GetArbiterWaitAddress() == arb_addr) {
|
||||
waiting_threads.push_back(thread);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -61,118 +174,4 @@ static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address)
|
||||
|
||||
return threads;
|
||||
}
|
||||
|
||||
// Wake up num_to_wake (or all) threads in a vector.
|
||||
static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
|
||||
// Only process up to 'target' threads, unless 'target' is <= 0, in which case process
|
||||
// them all.
|
||||
std::size_t last = waiting_threads.size();
|
||||
if (num_to_wake > 0)
|
||||
last = num_to_wake;
|
||||
|
||||
// Signal the waiting threads.
|
||||
for (std::size_t i = 0; i < last; i++) {
|
||||
ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
|
||||
waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
|
||||
waiting_threads[i]->SetArbiterWaitAddress(0);
|
||||
waiting_threads[i]->ResumeFromWait();
|
||||
}
|
||||
}
|
||||
|
||||
// Signals an address being waited on.
|
||||
ResultCode SignalToAddress(VAddr address, s32 num_to_wake) {
|
||||
std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
|
||||
|
||||
WakeThreads(waiting_threads, num_to_wake);
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
// Signals an address being waited on and increments its value if equal to the value argument.
|
||||
ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) {
|
||||
// Ensure that we can write to the address.
|
||||
if (!Memory::IsValidVirtualAddress(address)) {
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
|
||||
if (static_cast<s32>(Memory::Read32(address)) == value) {
|
||||
Memory::Write32(address, static_cast<u32>(value + 1));
|
||||
} else {
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
return SignalToAddress(address, num_to_wake);
|
||||
}
|
||||
|
||||
// Signals an address being waited on and modifies its value based on waiting thread count if equal
|
||||
// to the value argument.
|
||||
ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
|
||||
s32 num_to_wake) {
|
||||
// Ensure that we can write to the address.
|
||||
if (!Memory::IsValidVirtualAddress(address)) {
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
|
||||
// Get threads waiting on the address.
|
||||
std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
|
||||
|
||||
// Determine the modified value depending on the waiting count.
|
||||
s32 updated_value;
|
||||
if (waiting_threads.empty()) {
|
||||
updated_value = value - 1;
|
||||
} else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
|
||||
updated_value = value + 1;
|
||||
} else {
|
||||
updated_value = value;
|
||||
}
|
||||
|
||||
if (static_cast<s32>(Memory::Read32(address)) == value) {
|
||||
Memory::Write32(address, static_cast<u32>(updated_value));
|
||||
} else {
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
WakeThreads(waiting_threads, num_to_wake);
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
// Waits on an address if the value passed is less than the argument value, optionally decrementing.
|
||||
ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) {
|
||||
// Ensure that we can read the address.
|
||||
if (!Memory::IsValidVirtualAddress(address)) {
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
|
||||
s32 cur_value = static_cast<s32>(Memory::Read32(address));
|
||||
if (cur_value < value) {
|
||||
if (should_decrement) {
|
||||
Memory::Write32(address, static_cast<u32>(cur_value - 1));
|
||||
}
|
||||
} else {
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
// Short-circuit without rescheduling, if timeout is zero.
|
||||
if (timeout == 0) {
|
||||
return RESULT_TIMEOUT;
|
||||
}
|
||||
|
||||
return WaitForAddress(address, timeout);
|
||||
}
|
||||
|
||||
// Waits on an address if the value passed is equal to the argument value.
|
||||
ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
|
||||
// Ensure that we can read the address.
|
||||
if (!Memory::IsValidVirtualAddress(address)) {
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
// Only wait for the address if equal.
|
||||
if (static_cast<s32>(Memory::Read32(address)) != value) {
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
// Short-circuit without rescheduling, if timeout is zero.
|
||||
if (timeout == 0) {
|
||||
return RESULT_TIMEOUT;
|
||||
}
|
||||
|
||||
return WaitForAddress(address, timeout);
|
||||
}
|
||||
} // namespace Kernel::AddressArbiter
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -5,28 +5,68 @@
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/address_arbiter.h"
|
||||
|
||||
union ResultCode;
|
||||
|
||||
namespace Kernel::AddressArbiter {
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
enum class ArbitrationType {
|
||||
WaitIfLessThan = 0,
|
||||
DecrementAndWaitIfLessThan = 1,
|
||||
WaitIfEqual = 2,
|
||||
namespace Kernel {
|
||||
|
||||
class Thread;
|
||||
|
||||
class AddressArbiter {
|
||||
public:
|
||||
enum class ArbitrationType {
|
||||
WaitIfLessThan = 0,
|
||||
DecrementAndWaitIfLessThan = 1,
|
||||
WaitIfEqual = 2,
|
||||
};
|
||||
|
||||
enum class SignalType {
|
||||
Signal = 0,
|
||||
IncrementAndSignalIfEqual = 1,
|
||||
ModifyByWaitingCountAndSignalIfEqual = 2,
|
||||
};
|
||||
|
||||
explicit AddressArbiter(Core::System& system);
|
||||
~AddressArbiter();
|
||||
|
||||
AddressArbiter(const AddressArbiter&) = delete;
|
||||
AddressArbiter& operator=(const AddressArbiter&) = delete;
|
||||
|
||||
AddressArbiter(AddressArbiter&&) = default;
|
||||
AddressArbiter& operator=(AddressArbiter&&) = delete;
|
||||
|
||||
/// Signals an address being waited on.
|
||||
ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
|
||||
|
||||
/// Signals an address being waited on and increments its value if equal to the value argument.
|
||||
ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
|
||||
|
||||
/// Signals an address being waited on and modifies its value based on waiting thread count if
|
||||
/// equal to the value argument.
|
||||
ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
|
||||
s32 num_to_wake);
|
||||
|
||||
/// Waits on an address if the value passed is less than the argument value,
|
||||
/// optionally decrementing.
|
||||
ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
|
||||
bool should_decrement);
|
||||
|
||||
/// Waits on an address if the value passed is equal to the argument value.
|
||||
ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
|
||||
|
||||
private:
|
||||
// Waits on the given address with a timeout in nanoseconds
|
||||
ResultCode WaitForAddress(VAddr address, s64 timeout);
|
||||
|
||||
// Gets the threads waiting on an address.
|
||||
std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
|
||||
|
||||
Core::System& system;
|
||||
};
|
||||
|
||||
enum class SignalType {
|
||||
Signal = 0,
|
||||
IncrementAndSignalIfEqual = 1,
|
||||
ModifyByWaitingCountAndSignalIfEqual = 2,
|
||||
};
|
||||
|
||||
ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
|
||||
ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
|
||||
ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
|
||||
|
||||
ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement);
|
||||
ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
|
||||
|
||||
} // namespace Kernel::AddressArbiter
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
|
||||
// This destructor will be called automatically when the last ClientSession handle is closed by
|
||||
// the emulated application.
|
||||
|
||||
// Local references to ServerSession and SessionRequestHandler are necessary to guarantee they
|
||||
// A local reference to the ServerSession is necessary to guarantee it
|
||||
// will be kept alive until after ClientDisconnected() returns.
|
||||
SharedPtr<ServerSession> server = parent->server;
|
||||
if (server) {
|
||||
std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler;
|
||||
if (hle_handler)
|
||||
hle_handler->ClientDisconnected(server);
|
||||
|
||||
// TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
|
||||
// their WaitSynchronization result to 0xC920181A.
|
||||
|
||||
// Clean up the list of client threads with pending requests, they are unneeded now that the
|
||||
// client endpoint is closed.
|
||||
server->pending_requesting_threads.clear();
|
||||
server->currently_handling = nullptr;
|
||||
server->ClientDisconnected();
|
||||
}
|
||||
|
||||
parent->client = nullptr;
|
||||
|
||||
@@ -36,14 +36,15 @@ public:
|
||||
|
||||
ResultCode SendSyncRequest(SharedPtr<Thread> thread);
|
||||
|
||||
std::string name; ///< Name of client port (optional)
|
||||
private:
|
||||
explicit ClientSession(KernelCore& kernel);
|
||||
~ClientSession() override;
|
||||
|
||||
/// The parent session, which links to the server endpoint.
|
||||
std::shared_ptr<Session> parent;
|
||||
|
||||
private:
|
||||
explicit ClientSession(KernelCore& kernel);
|
||||
~ClientSession() override;
|
||||
/// Name of the client session (optional)
|
||||
std::string name;
|
||||
};
|
||||
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
|
||||
void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
|
||||
bool incoming) {
|
||||
IPC::RequestParser rp(src_cmdbuf);
|
||||
command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>());
|
||||
command_header = rp.PopRaw<IPC::CommandHeader>();
|
||||
|
||||
if (command_header->type == IPC::CommandType::Close) {
|
||||
// Close does not populate the rest of the IPC header
|
||||
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
|
||||
|
||||
// If handle descriptor is present, add size of it
|
||||
if (command_header->enable_handle_descriptor) {
|
||||
handle_descriptor_header =
|
||||
std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
|
||||
handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
|
||||
if (handle_descriptor_header->send_current_pid) {
|
||||
rp.Skip(2, false);
|
||||
}
|
||||
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
|
||||
// If this is an incoming message, only CommandType "Request" has a domain header
|
||||
// All outgoing domain messages have the domain header, if only incoming has it
|
||||
if (incoming || domain_message_header) {
|
||||
domain_message_header =
|
||||
std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
|
||||
domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
|
||||
} else {
|
||||
if (Session()->IsDomain())
|
||||
if (Session()->IsDomain()) {
|
||||
LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
data_payload_header =
|
||||
std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
|
||||
data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();
|
||||
|
||||
data_payload_offset = rp.GetCurrentOffset();
|
||||
|
||||
@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
|
||||
// Write the domain objects to the command buffer, these go after the raw untranslated data.
|
||||
// TODO(Subv): This completely ignores C buffers.
|
||||
std::size_t domain_offset = size - domain_message_header->num_objects;
|
||||
auto& request_handlers = server_session->domain_request_handlers;
|
||||
|
||||
for (auto& object : domain_objects) {
|
||||
request_handlers.emplace_back(object);
|
||||
dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size());
|
||||
for (const auto& object : domain_objects) {
|
||||
server_session->AppendDomainRequestHandler(object);
|
||||
dst_cmdbuf[domain_offset++] =
|
||||
static_cast<u32_le>(server_session->NumDomainRequestHandlers());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
@@ -15,6 +16,8 @@
|
||||
#include "core/hle/ipc.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
union ResultCode;
|
||||
|
||||
namespace Service {
|
||||
class ServiceFrameworkBase;
|
||||
}
|
||||
@@ -166,12 +169,12 @@ public:
|
||||
return buffer_c_desciptors;
|
||||
}
|
||||
|
||||
const IPC::DomainMessageHeader* GetDomainMessageHeader() const {
|
||||
return domain_message_header.get();
|
||||
const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
|
||||
return domain_message_header.value();
|
||||
}
|
||||
|
||||
bool HasDomainMessageHeader() const {
|
||||
return domain_message_header != nullptr;
|
||||
return domain_message_header.has_value();
|
||||
}
|
||||
|
||||
/// Helper function to read a buffer using the appropriate buffer descriptor
|
||||
@@ -208,14 +211,12 @@ public:
|
||||
|
||||
template <typename T>
|
||||
SharedPtr<T> GetCopyObject(std::size_t index) {
|
||||
ASSERT(index < copy_objects.size());
|
||||
return DynamicObjectCast<T>(copy_objects[index]);
|
||||
return DynamicObjectCast<T>(copy_objects.at(index));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
SharedPtr<T> GetMoveObject(std::size_t index) {
|
||||
ASSERT(index < move_objects.size());
|
||||
return DynamicObjectCast<T>(move_objects[index]);
|
||||
return DynamicObjectCast<T>(move_objects.at(index));
|
||||
}
|
||||
|
||||
void AddMoveObject(SharedPtr<Object> object) {
|
||||
@@ -232,7 +233,7 @@ public:
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
|
||||
return std::static_pointer_cast<T>(domain_request_handlers[index]);
|
||||
return std::static_pointer_cast<T>(domain_request_handlers.at(index));
|
||||
}
|
||||
|
||||
void SetDomainRequestHandlers(
|
||||
@@ -272,10 +273,10 @@ private:
|
||||
boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
|
||||
boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;
|
||||
|
||||
std::shared_ptr<IPC::CommandHeader> command_header;
|
||||
std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header;
|
||||
std::shared_ptr<IPC::DataPayloadHeader> data_payload_header;
|
||||
std::shared_ptr<IPC::DomainMessageHeader> domain_message_header;
|
||||
std::optional<IPC::CommandHeader> command_header;
|
||||
std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
|
||||
std::optional<IPC::DataPayloadHeader> data_payload_header;
|
||||
std::optional<IPC::DomainMessageHeader> domain_message_header;
|
||||
std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
|
||||
std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
|
||||
std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/hle/kernel/address_arbiter.h"
|
||||
#include "core/hle/kernel/client_port.h"
|
||||
#include "core/hle/kernel/handle_table.h"
|
||||
#include "core/hle/kernel/kernel.h"
|
||||
@@ -86,11 +87,13 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
|
||||
}
|
||||
|
||||
struct KernelCore::Impl {
|
||||
void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) {
|
||||
explicit Impl(Core::System& system) : address_arbiter{system}, system{system} {}
|
||||
|
||||
void Initialize(KernelCore& kernel) {
|
||||
Shutdown();
|
||||
|
||||
InitializeSystemResourceLimit(kernel);
|
||||
InitializeThreads(core_timing);
|
||||
InitializeThreads();
|
||||
}
|
||||
|
||||
void Shutdown() {
|
||||
@@ -122,9 +125,9 @@ struct KernelCore::Impl {
|
||||
ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess());
|
||||
}
|
||||
|
||||
void InitializeThreads(Core::Timing::CoreTiming& core_timing) {
|
||||
void InitializeThreads() {
|
||||
thread_wakeup_event_type =
|
||||
core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
|
||||
system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
|
||||
}
|
||||
|
||||
std::atomic<u32> next_object_id{0};
|
||||
@@ -135,6 +138,8 @@ struct KernelCore::Impl {
|
||||
std::vector<SharedPtr<Process>> process_list;
|
||||
Process* current_process = nullptr;
|
||||
|
||||
Kernel::AddressArbiter address_arbiter;
|
||||
|
||||
SharedPtr<ResourceLimit> system_resource_limit;
|
||||
|
||||
Core::Timing::EventType* thread_wakeup_event_type = nullptr;
|
||||
@@ -145,15 +150,18 @@ struct KernelCore::Impl {
|
||||
/// Map of named ports managed by the kernel, which can be retrieved using
|
||||
/// the ConnectToPort SVC.
|
||||
NamedPortTable named_ports;
|
||||
|
||||
// System context
|
||||
Core::System& system;
|
||||
};
|
||||
|
||||
KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {}
|
||||
KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
|
||||
KernelCore::~KernelCore() {
|
||||
Shutdown();
|
||||
}
|
||||
|
||||
void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) {
|
||||
impl->Initialize(*this, core_timing);
|
||||
void KernelCore::Initialize() {
|
||||
impl->Initialize(*this);
|
||||
}
|
||||
|
||||
void KernelCore::Shutdown() {
|
||||
@@ -184,6 +192,14 @@ const Process* KernelCore::CurrentProcess() const {
|
||||
return impl->current_process;
|
||||
}
|
||||
|
||||
AddressArbiter& KernelCore::AddressArbiter() {
|
||||
return impl->address_arbiter;
|
||||
}
|
||||
|
||||
const AddressArbiter& KernelCore::AddressArbiter() const {
|
||||
return impl->address_arbiter;
|
||||
}
|
||||
|
||||
void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
|
||||
impl->named_ports.emplace(std::move(name), std::move(port));
|
||||
}
|
||||
|
||||
@@ -11,6 +11,10 @@
|
||||
template <typename T>
|
||||
class ResultVal;
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
struct EventType;
|
||||
@@ -18,6 +22,7 @@ struct EventType;
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
class AddressArbiter;
|
||||
class ClientPort;
|
||||
class HandleTable;
|
||||
class Process;
|
||||
@@ -30,7 +35,14 @@ private:
|
||||
using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;
|
||||
|
||||
public:
|
||||
KernelCore();
|
||||
/// Constructs an instance of the kernel using the given System
|
||||
/// instance as a context for any necessary system-related state,
|
||||
/// such as threads, CPU core state, etc.
|
||||
///
|
||||
/// @post After execution of the constructor, the provided System
|
||||
/// object *must* outlive the kernel instance itself.
|
||||
///
|
||||
explicit KernelCore(Core::System& system);
|
||||
~KernelCore();
|
||||
|
||||
KernelCore(const KernelCore&) = delete;
|
||||
@@ -40,11 +52,7 @@ public:
|
||||
KernelCore& operator=(KernelCore&&) = delete;
|
||||
|
||||
/// Resets the kernel to a clean slate for use.
|
||||
///
|
||||
/// @param core_timing CoreTiming instance used to create any necessary
|
||||
/// kernel-specific callback events.
|
||||
///
|
||||
void Initialize(Core::Timing::CoreTiming& core_timing);
|
||||
void Initialize();
|
||||
|
||||
/// Clears all resources in use by the kernel instance.
|
||||
void Shutdown();
|
||||
@@ -67,6 +75,12 @@ public:
|
||||
/// Retrieves a const pointer to the current process.
|
||||
const Process* CurrentProcess() const;
|
||||
|
||||
/// Provides a reference to the kernel's address arbiter.
|
||||
Kernel::AddressArbiter& AddressArbiter();
|
||||
|
||||
/// Provides a const reference to the kernel's address arbiter.
|
||||
const Kernel::AddressArbiter& AddressArbiter() const;
|
||||
|
||||
/// Adds a port to the named port table
|
||||
void AddNamedPort(std::string name, SharedPtr<ClientPort> port);
|
||||
|
||||
|
||||
@@ -19,7 +19,8 @@ namespace Kernel {
|
||||
|
||||
std::mutex Scheduler::scheduler_mutex;
|
||||
|
||||
Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {}
|
||||
Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
|
||||
: cpu_core{cpu_core}, system{system} {}
|
||||
|
||||
Scheduler::~Scheduler() {
|
||||
for (auto& thread : thread_list) {
|
||||
@@ -61,7 +62,7 @@ Thread* Scheduler::PopNextReadyThread() {
|
||||
|
||||
void Scheduler::SwitchContext(Thread* new_thread) {
|
||||
Thread* const previous_thread = GetCurrentThread();
|
||||
Process* const previous_process = Core::CurrentProcess();
|
||||
Process* const previous_process = system.Kernel().CurrentProcess();
|
||||
|
||||
UpdateLastContextSwitchTime(previous_thread, previous_process);
|
||||
|
||||
@@ -94,8 +95,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {
|
||||
|
||||
auto* const thread_owner_process = current_thread->GetOwnerProcess();
|
||||
if (previous_process != thread_owner_process) {
|
||||
Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process);
|
||||
SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table);
|
||||
system.Kernel().MakeCurrentProcess(thread_owner_process);
|
||||
SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
|
||||
}
|
||||
|
||||
cpu_core.LoadContext(new_thread->GetContext());
|
||||
@@ -111,7 +112,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
|
||||
|
||||
void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
|
||||
const u64 prev_switch_ticks = last_context_switch_time;
|
||||
const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
|
||||
const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
|
||||
const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
|
||||
|
||||
if (thread != nullptr) {
|
||||
@@ -223,8 +224,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
|
||||
// Take the first non-nullptr one
|
||||
for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
|
||||
const auto res =
|
||||
Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread(
|
||||
core, priority);
|
||||
system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
|
||||
|
||||
// If scheduler provides a suggested thread
|
||||
if (res != nullptr) {
|
||||
|
||||
@@ -13,7 +13,8 @@
|
||||
|
||||
namespace Core {
|
||||
class ARM_Interface;
|
||||
}
|
||||
class System;
|
||||
} // namespace Core
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
@@ -21,7 +22,7 @@ class Process;
|
||||
|
||||
class Scheduler final {
|
||||
public:
|
||||
explicit Scheduler(Core::ARM_Interface& cpu_core);
|
||||
explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
|
||||
~Scheduler();
|
||||
|
||||
/// Returns whether there are any threads that are ready to run.
|
||||
@@ -162,6 +163,7 @@ private:
|
||||
Core::ARM_Interface& cpu_core;
|
||||
u64 last_context_switch_time = 0;
|
||||
|
||||
Core::System& system;
|
||||
static std::mutex scheduler_mutex;
|
||||
};
|
||||
|
||||
|
||||
@@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) {
|
||||
pending_requesting_threads.pop_back();
|
||||
}
|
||||
|
||||
ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
|
||||
auto* const domain_message_header = context.GetDomainMessageHeader();
|
||||
if (domain_message_header) {
|
||||
// Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
|
||||
context.SetDomainRequestHandlers(domain_request_handlers);
|
||||
|
||||
// If there is a DomainMessageHeader, then this is CommandType "Request"
|
||||
const u32 object_id{context.GetDomainMessageHeader()->object_id};
|
||||
switch (domain_message_header->command) {
|
||||
case IPC::DomainMessageHeader::CommandType::SendMessage:
|
||||
if (object_id > domain_request_handlers.size()) {
|
||||
LOG_CRITICAL(IPC,
|
||||
"object_id {} is too big! This probably means a recent service call "
|
||||
"to {} needed to return a new interface!",
|
||||
object_id, name);
|
||||
UNREACHABLE();
|
||||
return RESULT_SUCCESS; // Ignore error if asserts are off
|
||||
}
|
||||
return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
|
||||
|
||||
case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
|
||||
LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
|
||||
|
||||
domain_request_handlers[object_id - 1] = nullptr;
|
||||
|
||||
IPC::ResponseBuilder rb{context, 2};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_CRITICAL(IPC, "Unknown domain command={}",
|
||||
static_cast<int>(domain_message_header->command.Value()));
|
||||
ASSERT(false);
|
||||
void ServerSession::ClientDisconnected() {
|
||||
// We keep a shared pointer to the hle handler to keep it alive throughout
|
||||
// the call to ClientDisconnected, as ClientDisconnected invalidates the
|
||||
// hle_handler member itself during the course of the function executing.
|
||||
std::shared_ptr<SessionRequestHandler> handler = hle_handler;
|
||||
if (handler) {
|
||||
// Note that after this returns, this server session's hle_handler is
|
||||
// invalidated (set to null).
|
||||
handler->ClientDisconnected(this);
|
||||
}
|
||||
|
||||
// TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
|
||||
// their WaitSynchronization result to 0xC920181A.
|
||||
|
||||
// Clean up the list of client threads with pending requests, they are unneeded now that the
|
||||
// client endpoint is closed.
|
||||
pending_requesting_threads.clear();
|
||||
currently_handling = nullptr;
|
||||
}
|
||||
|
||||
void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
|
||||
domain_request_handlers.push_back(std::move(handler));
|
||||
}
|
||||
|
||||
std::size_t ServerSession::NumDomainRequestHandlers() const {
|
||||
return domain_request_handlers.size();
|
||||
}
|
||||
|
||||
ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
|
||||
if (!context.HasDomainMessageHeader()) {
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
// Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
|
||||
context.SetDomainRequestHandlers(domain_request_handlers);
|
||||
|
||||
// If there is a DomainMessageHeader, then this is CommandType "Request"
|
||||
const auto& domain_message_header = context.GetDomainMessageHeader();
|
||||
const u32 object_id{domain_message_header.object_id};
|
||||
switch (domain_message_header.command) {
|
||||
case IPC::DomainMessageHeader::CommandType::SendMessage:
|
||||
if (object_id > domain_request_handlers.size()) {
|
||||
LOG_CRITICAL(IPC,
|
||||
"object_id {} is too big! This probably means a recent service call "
|
||||
"to {} needed to return a new interface!",
|
||||
object_id, name);
|
||||
UNREACHABLE();
|
||||
return RESULT_SUCCESS; // Ignore error if asserts are off
|
||||
}
|
||||
return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
|
||||
|
||||
case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
|
||||
LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
|
||||
|
||||
domain_request_handlers[object_id - 1] = nullptr;
|
||||
|
||||
IPC::ResponseBuilder rb{context, 2};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_CRITICAL(IPC, "Unknown domain command={}",
|
||||
static_cast<int>(domain_message_header.command.Value()));
|
||||
ASSERT(false);
|
||||
return RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -46,6 +46,14 @@ public:
|
||||
return HANDLE_TYPE;
|
||||
}
|
||||
|
||||
Session* GetParent() {
|
||||
return parent.get();
|
||||
}
|
||||
|
||||
const Session* GetParent() const {
|
||||
return parent.get();
|
||||
}
|
||||
|
||||
using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
|
||||
|
||||
/**
|
||||
@@ -78,23 +86,16 @@ public:
|
||||
|
||||
void Acquire(Thread* thread) override;
|
||||
|
||||
std::string name; ///< The name of this session (optional)
|
||||
std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint.
|
||||
std::shared_ptr<SessionRequestHandler>
|
||||
hle_handler; ///< This session's HLE request handler (applicable when not a domain)
|
||||
/// Called when a client disconnection occurs.
|
||||
void ClientDisconnected();
|
||||
|
||||
/// This is the list of domain request handlers (after conversion to a domain)
|
||||
std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
|
||||
/// Adds a new domain request handler to the collection of request handlers within
|
||||
/// this ServerSession instance.
|
||||
void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);
|
||||
|
||||
/// List of threads that are pending a response after a sync request. This list is processed in
|
||||
/// a LIFO manner, thus, the last request will be dispatched first.
|
||||
/// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
|
||||
std::vector<SharedPtr<Thread>> pending_requesting_threads;
|
||||
|
||||
/// Thread whose request is currently being handled. A request is considered "handled" when a
|
||||
/// response is sent via svcReplyAndReceive.
|
||||
/// TODO(Subv): Find a better name for this.
|
||||
SharedPtr<Thread> currently_handling;
|
||||
/// Retrieves the total number of domain request handlers that have been
|
||||
/// appended to this ServerSession instance.
|
||||
std::size_t NumDomainRequestHandlers() const;
|
||||
|
||||
/// Returns true if the session has been converted to a domain, otherwise False
|
||||
bool IsDomain() const {
|
||||
@@ -129,8 +130,30 @@ private:
|
||||
/// object handle.
|
||||
ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);
|
||||
|
||||
/// The parent session, which links to the client endpoint.
|
||||
std::shared_ptr<Session> parent;
|
||||
|
||||
/// This session's HLE request handler (applicable when not a domain)
|
||||
std::shared_ptr<SessionRequestHandler> hle_handler;
|
||||
|
||||
/// This is the list of domain request handlers (after conversion to a domain)
|
||||
std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
|
||||
|
||||
/// List of threads that are pending a response after a sync request. This list is processed in
|
||||
/// a LIFO manner, thus, the last request will be dispatched first.
|
||||
/// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
|
||||
std::vector<SharedPtr<Thread>> pending_requesting_threads;
|
||||
|
||||
/// Thread whose request is currently being handled. A request is considered "handled" when a
|
||||
/// response is sent via svcReplyAndReceive.
|
||||
/// TODO(Subv): Find a better name for this.
|
||||
SharedPtr<Thread> currently_handling;
|
||||
|
||||
/// When set to True, converts the session to a domain at the end of the command
|
||||
bool convert_to_domain{};
|
||||
|
||||
/// The name of this session (optional)
|
||||
std::string name;
|
||||
};
|
||||
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/hle/kernel/errors.h"
|
||||
#include "core/hle/kernel/kernel.h"
|
||||
#include "core/hle/kernel/shared_memory.h"
|
||||
@@ -34,8 +33,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
|
||||
shared_memory->backing_block_offset = 0;
|
||||
|
||||
// Refresh the address mappings for the current process.
|
||||
if (Core::CurrentProcess() != nullptr) {
|
||||
Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
|
||||
if (kernel.CurrentProcess() != nullptr) {
|
||||
kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
|
||||
shared_memory->backing_block.get());
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "core/hle/kernel/address_arbiter.h"
|
||||
#include "core/hle/kernel/client_port.h"
|
||||
#include "core/hle/kernel/client_session.h"
|
||||
#include "core/hle/kernel/errors.h"
|
||||
#include "core/hle/kernel/handle_table.h"
|
||||
#include "core/hle/kernel/kernel.h"
|
||||
#include "core/hle/kernel/mutex.h"
|
||||
@@ -47,23 +48,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
|
||||
return address + size > address;
|
||||
}
|
||||
|
||||
// Checks if a given address range lies within a larger address range.
|
||||
constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
|
||||
VAddr address_range_end) {
|
||||
const VAddr end_address = address + size - 1;
|
||||
return address_range_begin <= address && end_address <= address_range_end - 1;
|
||||
}
|
||||
|
||||
bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
|
||||
return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
|
||||
vm.GetAddressSpaceEndAddress());
|
||||
}
|
||||
|
||||
bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
|
||||
return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
|
||||
vm.GetNewMapRegionEndAddress());
|
||||
}
|
||||
|
||||
// 8 GiB
|
||||
constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;
|
||||
|
||||
@@ -105,14 +89,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
|
||||
if (!IsInsideAddressSpace(vm_manager, src_addr, size)) {
|
||||
if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
|
||||
LOG_ERROR(Kernel_SVC,
|
||||
"Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
|
||||
src_addr, size);
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
}
|
||||
|
||||
if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) {
|
||||
if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
|
||||
LOG_ERROR(Kernel_SVC,
|
||||
"Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
|
||||
dst_addr, size);
|
||||
@@ -238,7 +222,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
|
||||
auto* const current_process = Core::CurrentProcess();
|
||||
auto& vm_manager = current_process->VMManager();
|
||||
|
||||
if (!IsInsideAddressSpace(vm_manager, addr, size)) {
|
||||
if (!vm_manager.IsWithinAddressSpace(addr, size)) {
|
||||
LOG_ERROR(Kernel_SVC,
|
||||
"Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
|
||||
size);
|
||||
@@ -299,7 +283,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
|
||||
}
|
||||
|
||||
auto& vm_manager = Core::CurrentProcess()->VMManager();
|
||||
if (!IsInsideAddressSpace(vm_manager, address, size)) {
|
||||
if (!vm_manager.IsWithinAddressSpace(address, size)) {
|
||||
LOG_ERROR(Kernel_SVC,
|
||||
"Given address (0x{:016X}) is outside the bounds of the address space.", address);
|
||||
return ERR_INVALID_ADDRESS_STATE;
|
||||
@@ -1495,13 +1479,14 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout
|
||||
return ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
auto& address_arbiter = Core::System::GetInstance().Kernel().AddressArbiter();
|
||||
switch (static_cast<AddressArbiter::ArbitrationType>(type)) {
|
||||
case AddressArbiter::ArbitrationType::WaitIfLessThan:
|
||||
return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false);
|
||||
return address_arbiter.WaitForAddressIfLessThan(address, value, timeout, false);
|
||||
case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan:
|
||||
return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
|
||||
return address_arbiter.WaitForAddressIfLessThan(address, value, timeout, true);
|
||||
case AddressArbiter::ArbitrationType::WaitIfEqual:
|
||||
return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
|
||||
return address_arbiter.WaitForAddressIfEqual(address, value, timeout);
|
||||
default:
|
||||
LOG_ERROR(Kernel_SVC,
|
||||
"Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
|
||||
@@ -1526,13 +1511,14 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
|
||||
return ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
auto& address_arbiter = Core::System::GetInstance().Kernel().AddressArbiter();
|
||||
switch (static_cast<AddressArbiter::SignalType>(type)) {
|
||||
case AddressArbiter::SignalType::Signal:
|
||||
return AddressArbiter::SignalToAddress(address, num_to_wake);
|
||||
return address_arbiter.SignalToAddress(address, num_to_wake);
|
||||
case AddressArbiter::SignalType::IncrementAndSignalIfEqual:
|
||||
return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
|
||||
return address_arbiter.IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
|
||||
case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
|
||||
return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
|
||||
return address_arbiter.ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
|
||||
num_to_wake);
|
||||
default:
|
||||
LOG_ERROR(Kernel_SVC,
|
||||
|
||||
@@ -184,8 +184,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
|
||||
return ERR_INVALID_PROCESSOR_ID;
|
||||
}
|
||||
|
||||
// TODO(yuriks): Other checks, returning 0xD9001BEA
|
||||
|
||||
if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
|
||||
LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
|
||||
// TODO (bunnei): Find the correct error code to use here
|
||||
|
||||
@@ -17,8 +17,8 @@
|
||||
#include "core/memory_setup.h"
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
static const char* GetMemoryStateName(MemoryState state) {
|
||||
namespace {
|
||||
const char* GetMemoryStateName(MemoryState state) {
|
||||
static constexpr const char* names[] = {
|
||||
"Unmapped", "Io",
|
||||
"Normal", "CodeStatic",
|
||||
@@ -35,6 +35,14 @@ static const char* GetMemoryStateName(MemoryState state) {
|
||||
return names[ToSvcMemoryState(state)];
|
||||
}
|
||||
|
||||
// Checks if a given address range lies within a larger address range.
|
||||
constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
|
||||
VAddr address_range_end) {
|
||||
const VAddr end_address = address + size - 1;
|
||||
return address_range_begin <= address && end_address <= address_range_end - 1;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
|
||||
ASSERT(base + size == next.base);
|
||||
if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
|
||||
@@ -249,8 +257,7 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
|
||||
}
|
||||
|
||||
ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
|
||||
if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
|
||||
target + size < target) {
|
||||
if (!IsWithinHeapRegion(target, size)) {
|
||||
return ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
@@ -285,8 +292,7 @@ ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission p
|
||||
}
|
||||
|
||||
ResultCode VMManager::HeapFree(VAddr target, u64 size) {
|
||||
if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
|
||||
target + size < target) {
|
||||
if (!IsWithinHeapRegion(target, size)) {
|
||||
return ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
@@ -706,6 +712,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
|
||||
return address_space_width;
|
||||
}
|
||||
|
||||
bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
|
||||
return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
|
||||
GetAddressSpaceEndAddress());
|
||||
}
|
||||
|
||||
VAddr VMManager::GetASLRRegionBaseAddress() const {
|
||||
return aslr_region_base;
|
||||
}
|
||||
@@ -750,6 +761,11 @@ u64 VMManager::GetCodeRegionSize() const {
|
||||
return code_region_end - code_region_base;
|
||||
}
|
||||
|
||||
bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
|
||||
return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
|
||||
GetCodeRegionEndAddress());
|
||||
}
|
||||
|
||||
VAddr VMManager::GetHeapRegionBaseAddress() const {
|
||||
return heap_region_base;
|
||||
}
|
||||
@@ -762,6 +778,11 @@ u64 VMManager::GetHeapRegionSize() const {
|
||||
return heap_region_end - heap_region_base;
|
||||
}
|
||||
|
||||
bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
|
||||
return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
|
||||
GetHeapRegionEndAddress());
|
||||
}
|
||||
|
||||
VAddr VMManager::GetMapRegionBaseAddress() const {
|
||||
return map_region_base;
|
||||
}
|
||||
@@ -774,6 +795,10 @@ u64 VMManager::GetMapRegionSize() const {
|
||||
return map_region_end - map_region_base;
|
||||
}
|
||||
|
||||
bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
|
||||
return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
|
||||
}
|
||||
|
||||
VAddr VMManager::GetNewMapRegionBaseAddress() const {
|
||||
return new_map_region_base;
|
||||
}
|
||||
@@ -786,6 +811,11 @@ u64 VMManager::GetNewMapRegionSize() const {
|
||||
return new_map_region_end - new_map_region_base;
|
||||
}
|
||||
|
||||
bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
|
||||
return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
|
||||
GetNewMapRegionEndAddress());
|
||||
}
|
||||
|
||||
VAddr VMManager::GetTLSIORegionBaseAddress() const {
|
||||
return tls_io_region_base;
|
||||
}
|
||||
@@ -798,4 +828,9 @@ u64 VMManager::GetTLSIORegionSize() const {
|
||||
return tls_io_region_end - tls_io_region_base;
|
||||
}
|
||||
|
||||
bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
|
||||
return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
|
||||
GetTLSIORegionEndAddress());
|
||||
}
|
||||
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -432,18 +432,21 @@ public:
|
||||
/// Gets the address space width in bits.
|
||||
u64 GetAddressSpaceWidth() const;
|
||||
|
||||
/// Determines whether or not the given address range lies within the address space.
|
||||
bool IsWithinAddressSpace(VAddr address, u64 size) const;
|
||||
|
||||
/// Gets the base address of the ASLR region.
|
||||
VAddr GetASLRRegionBaseAddress() const;
|
||||
|
||||
/// Gets the end address of the ASLR region.
|
||||
VAddr GetASLRRegionEndAddress() const;
|
||||
|
||||
/// Determines whether or not the specified address range is within the ASLR region.
|
||||
bool IsWithinASLRRegion(VAddr address, u64 size) const;
|
||||
|
||||
/// Gets the size of the ASLR region
|
||||
u64 GetASLRRegionSize() const;
|
||||
|
||||
/// Determines whether or not the specified address range is within the ASLR region.
|
||||
bool IsWithinASLRRegion(VAddr address, u64 size) const;
|
||||
|
||||
/// Gets the base address of the code region.
|
||||
VAddr GetCodeRegionBaseAddress() const;
|
||||
|
||||
@@ -453,6 +456,9 @@ public:
|
||||
/// Gets the total size of the code region in bytes.
|
||||
u64 GetCodeRegionSize() const;
|
||||
|
||||
/// Determines whether or not the specified range is within the code region.
|
||||
bool IsWithinCodeRegion(VAddr address, u64 size) const;
|
||||
|
||||
/// Gets the base address of the heap region.
|
||||
VAddr GetHeapRegionBaseAddress() const;
|
||||
|
||||
@@ -462,6 +468,9 @@ public:
|
||||
/// Gets the total size of the heap region in bytes.
|
||||
u64 GetHeapRegionSize() const;
|
||||
|
||||
/// Determines whether or not the specified range is within the heap region.
|
||||
bool IsWithinHeapRegion(VAddr address, u64 size) const;
|
||||
|
||||
/// Gets the base address of the map region.
|
||||
VAddr GetMapRegionBaseAddress() const;
|
||||
|
||||
@@ -471,6 +480,9 @@ public:
|
||||
/// Gets the total size of the map region in bytes.
|
||||
u64 GetMapRegionSize() const;
|
||||
|
||||
/// Determines whether or not the specified range is within the map region.
|
||||
bool IsWithinMapRegion(VAddr address, u64 size) const;
|
||||
|
||||
/// Gets the base address of the new map region.
|
||||
VAddr GetNewMapRegionBaseAddress() const;
|
||||
|
||||
@@ -480,6 +492,9 @@ public:
|
||||
/// Gets the total size of the new map region in bytes.
|
||||
u64 GetNewMapRegionSize() const;
|
||||
|
||||
/// Determines whether or not the given address range is within the new map region
|
||||
bool IsWithinNewMapRegion(VAddr address, u64 size) const;
|
||||
|
||||
/// Gets the base address of the TLS IO region.
|
||||
VAddr GetTLSIORegionBaseAddress() const;
|
||||
|
||||
@@ -489,6 +504,9 @@ public:
|
||||
/// Gets the total size of the TLS IO region in bytes.
|
||||
u64 GetTLSIORegionSize() const;
|
||||
|
||||
/// Determines if the given address range is within the TLS IO region.
|
||||
bool IsWithinTLSIORegion(VAddr address, u64 size) const;
|
||||
|
||||
/// Each VMManager has its own page table, which is set as the main one when the owning process
|
||||
/// is scheduled.
|
||||
Memory::PageTable page_table;
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include <utility>
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include "common/string_util.h"
|
||||
#include "core/core.h"
|
||||
#include "core/frontend/applets/software_keyboard.h"
|
||||
#include "core/hle/result.h"
|
||||
#include "core/hle/service/am/am.h"
|
||||
#include "core/hle/service/am/applets/software_keyboard.h"
|
||||
|
||||
|
||||
@@ -9,10 +9,13 @@
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/swap.h"
|
||||
#include "core/hle/service/am/am.h"
|
||||
#include "core/hle/service/am/applets/applets.h"
|
||||
|
||||
union ResultCode;
|
||||
|
||||
namespace Service::AM::Applets {
|
||||
|
||||
enum class KeysetDisable : u32 {
|
||||
|
||||
@@ -18,17 +18,11 @@
|
||||
#include "core/hle/kernel/readable_event.h"
|
||||
#include "core/hle/kernel/writable_event.h"
|
||||
#include "core/hle/service/audio/audout_u.h"
|
||||
#include "core/hle/service/audio/errors.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
namespace Service::Audio {
|
||||
|
||||
namespace ErrCodes {
|
||||
enum {
|
||||
ErrorUnknown = 2,
|
||||
BufferCountExceeded = 8,
|
||||
};
|
||||
}
|
||||
|
||||
constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
|
||||
constexpr int DefaultSampleRate{48000};
|
||||
|
||||
@@ -100,7 +94,7 @@ private:
|
||||
|
||||
if (stream->IsPlaying()) {
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown));
|
||||
rb.Push(ERR_OPERATION_FAILED);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -113,7 +107,9 @@ private:
|
||||
void StopAudioOut(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_Audio, "called");
|
||||
|
||||
audio_core.StopStream(stream);
|
||||
if (stream->IsPlaying()) {
|
||||
audio_core.StopStream(stream);
|
||||
}
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
@@ -143,7 +139,8 @@ private:
|
||||
|
||||
if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded));
|
||||
rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
|
||||
return;
|
||||
}
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "core/hle/kernel/readable_event.h"
|
||||
#include "core/hle/kernel/writable_event.h"
|
||||
#include "core/hle/service/audio/audren_u.h"
|
||||
#include "core/hle/service/audio/errors.h"
|
||||
|
||||
namespace Service::Audio {
|
||||
|
||||
@@ -146,7 +147,7 @@ private:
|
||||
// code in this case.
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(ResultCode{ErrorModule::Audio, 201});
|
||||
rb.Push(ERR_NOT_SUPPORTED);
|
||||
}
|
||||
|
||||
Kernel::EventPair system_event;
|
||||
|
||||
15
src/core/hle/service/audio/errors.h
Normal file
15
src/core/hle/service/audio/errors.h
Normal file
@@ -0,0 +1,15 @@
|
||||
// Copyright 2019 yuzu emulator team
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/hle/result.h"
|
||||
|
||||
namespace Service::Audio {
|
||||
|
||||
constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
|
||||
constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
|
||||
constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
|
||||
|
||||
} // namespace Service::Audio
|
||||
@@ -9,43 +9,32 @@
|
||||
|
||||
#include <opus.h>
|
||||
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/hle/ipc_helpers.h"
|
||||
#include "core/hle/kernel/hle_ipc.h"
|
||||
#include "core/hle/service/audio/hwopus.h"
|
||||
|
||||
namespace Service::Audio {
|
||||
|
||||
namespace {
|
||||
struct OpusDeleter {
|
||||
void operator()(void* ptr) const {
|
||||
operator delete(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
|
||||
using OpusDecoderPtr = std::unique_ptr<OpusDecoder, OpusDeleter>;
|
||||
|
||||
struct OpusPacketHeader {
|
||||
// Packet size in bytes.
|
||||
u32_be size;
|
||||
// Indicates the final range of the codec's entropy coder.
|
||||
u32_be final_range;
|
||||
};
|
||||
static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
|
||||
|
||||
class OpusDecoderStateBase {
|
||||
public:
|
||||
IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate,
|
||||
u32 channel_count)
|
||||
: ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
|
||||
sample_rate(sample_rate), channel_count(channel_count) {
|
||||
// clang-format off
|
||||
static const FunctionInfo functions[] = {
|
||||
{0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
|
||||
{1, nullptr, "SetContext"},
|
||||
{2, nullptr, "DecodeInterleavedForMultiStreamOld"},
|
||||
{3, nullptr, "SetContextForMultiStream"},
|
||||
{4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
|
||||
{5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
|
||||
{6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
|
||||
{7, nullptr, "DecodeInterleavedForMultiStream"},
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
RegisterHandlers(functions);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Describes extra behavior that may be asked of the decoding context.
|
||||
enum class ExtraBehavior {
|
||||
/// No extra behavior.
|
||||
@@ -55,30 +44,36 @@ private:
|
||||
ResetContext,
|
||||
};
|
||||
|
||||
void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
enum class PerfTime {
|
||||
Disabled,
|
||||
Enabled,
|
||||
};
|
||||
|
||||
DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None);
|
||||
}
|
||||
|
||||
void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
|
||||
u64 performance = 0;
|
||||
DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None);
|
||||
}
|
||||
|
||||
void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
|
||||
IPC::RequestParser rp{ctx};
|
||||
const auto extra_behavior =
|
||||
rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None;
|
||||
|
||||
u64 performance = 0;
|
||||
DecodeInterleavedHelper(ctx, &performance, extra_behavior);
|
||||
virtual ~OpusDecoderStateBase() = default;
|
||||
|
||||
// Decodes interleaved Opus packets. Optionally allows reporting time taken to
|
||||
// perform the decoding, as well as any relevant extra behavior.
|
||||
virtual void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
|
||||
ExtraBehavior extra_behavior) = 0;
|
||||
};
|
||||
|
||||
// Represents the decoder state for a non-multistream decoder.
|
||||
class OpusDecoderState final : public OpusDecoderStateBase {
|
||||
public:
|
||||
explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
|
||||
: decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
|
||||
|
||||
void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
|
||||
ExtraBehavior extra_behavior) override {
|
||||
if (perf_time == PerfTime::Disabled) {
|
||||
DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
|
||||
} else {
|
||||
u64 performance = 0;
|
||||
DecodeInterleavedHelper(ctx, &performance, extra_behavior);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
|
||||
ExtraBehavior extra_behavior) {
|
||||
u32 consumed = 0;
|
||||
@@ -89,8 +84,7 @@ private:
|
||||
ResetDecoderContext();
|
||||
}
|
||||
|
||||
if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples,
|
||||
performance)) {
|
||||
if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
|
||||
LOG_ERROR(Audio, "Failed to decode opus data");
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
// TODO(ogniK): Use correct error code
|
||||
@@ -109,27 +103,27 @@ private:
|
||||
ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
|
||||
}
|
||||
|
||||
bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input,
|
||||
std::vector<opus_int16>& output, u64* out_performance_time) {
|
||||
bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
|
||||
std::vector<opus_int16>& output, u64* out_performance_time) const {
|
||||
const auto start_time = std::chrono::high_resolution_clock::now();
|
||||
const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
|
||||
if (sizeof(OpusHeader) > input.size()) {
|
||||
if (sizeof(OpusPacketHeader) > input.size()) {
|
||||
LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
|
||||
sizeof(OpusHeader), input.size());
|
||||
sizeof(OpusPacketHeader), input.size());
|
||||
return false;
|
||||
}
|
||||
|
||||
OpusHeader hdr{};
|
||||
std::memcpy(&hdr, input.data(), sizeof(OpusHeader));
|
||||
if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) {
|
||||
OpusPacketHeader hdr{};
|
||||
std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader));
|
||||
if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) {
|
||||
LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}",
|
||||
sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size());
|
||||
sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size());
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto frame = input.data() + sizeof(OpusHeader);
|
||||
const auto frame = input.data() + sizeof(OpusPacketHeader);
|
||||
const auto decoded_sample_count = opus_packet_get_nb_samples(
|
||||
frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)),
|
||||
frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)),
|
||||
static_cast<opus_int32>(sample_rate));
|
||||
if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
|
||||
LOG_ERROR(
|
||||
@@ -141,18 +135,18 @@ private:
|
||||
|
||||
const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
|
||||
const auto out_sample_count =
|
||||
opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0);
|
||||
opus_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
|
||||
if (out_sample_count < 0) {
|
||||
LOG_ERROR(Audio,
|
||||
"Incorrect sample count received from opus_decode, "
|
||||
"output_sample_count={}, frame_size={}, data_sz_from_hdr={}",
|
||||
out_sample_count, frame_size, static_cast<u32>(hdr.sz));
|
||||
out_sample_count, frame_size, static_cast<u32>(hdr.size));
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
|
||||
sample_count = out_sample_count;
|
||||
consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz);
|
||||
consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
|
||||
if (out_performance_time != nullptr) {
|
||||
*out_performance_time =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
|
||||
@@ -167,21 +161,66 @@ private:
|
||||
opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
|
||||
}
|
||||
|
||||
struct OpusHeader {
|
||||
u32_be sz; // Needs to be BE for some odd reason
|
||||
INSERT_PADDING_WORDS(1);
|
||||
};
|
||||
static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size");
|
||||
|
||||
std::unique_ptr<OpusDecoder, OpusDeleter> decoder;
|
||||
OpusDecoderPtr decoder;
|
||||
u32 sample_rate;
|
||||
u32 channel_count;
|
||||
};
|
||||
|
||||
static std::size_t WorkerBufferSize(u32 channel_count) {
|
||||
class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
|
||||
public:
|
||||
explicit IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoderStateBase> decoder_state)
|
||||
: ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
|
||||
// clang-format off
|
||||
static const FunctionInfo functions[] = {
|
||||
{0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
|
||||
{1, nullptr, "SetContext"},
|
||||
{2, nullptr, "DecodeInterleavedForMultiStreamOld"},
|
||||
{3, nullptr, "SetContextForMultiStream"},
|
||||
{4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
|
||||
{5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
|
||||
{6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
|
||||
{7, nullptr, "DecodeInterleavedForMultiStream"},
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
RegisterHandlers(functions);
|
||||
}
|
||||
|
||||
private:
|
||||
void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
|
||||
decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Disabled,
|
||||
OpusDecoderStateBase::ExtraBehavior::None);
|
||||
}
|
||||
|
||||
void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
|
||||
decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
|
||||
OpusDecoderStateBase::ExtraBehavior::None);
|
||||
}
|
||||
|
||||
void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called");
|
||||
|
||||
IPC::RequestParser rp{ctx};
|
||||
const auto extra_behavior = rp.Pop<bool>()
|
||||
? OpusDecoderStateBase::ExtraBehavior::ResetContext
|
||||
: OpusDecoderStateBase::ExtraBehavior::None;
|
||||
|
||||
decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
|
||||
extra_behavior);
|
||||
}
|
||||
|
||||
std::unique_ptr<OpusDecoderStateBase> decoder_state;
|
||||
};
|
||||
|
||||
std::size_t WorkerBufferSize(u32 channel_count) {
|
||||
ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
|
||||
return opus_decoder_get_size(static_cast<int>(channel_count));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp{ctx};
|
||||
@@ -220,8 +259,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
|
||||
const std::size_t worker_sz = WorkerBufferSize(channel_count);
|
||||
ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
|
||||
|
||||
std::unique_ptr<OpusDecoder, OpusDeleter> decoder{
|
||||
static_cast<OpusDecoder*>(operator new(worker_sz))};
|
||||
OpusDecoderPtr decoder{static_cast<OpusDecoder*>(operator new(worker_sz))};
|
||||
if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
|
||||
LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err);
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
@@ -232,8 +270,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate,
|
||||
channel_count);
|
||||
rb.PushIpcInterface<IHardwareOpusDecoderManager>(
|
||||
std::make_unique<OpusDecoderState>(std::move(decoder), sample_rate, channel_count));
|
||||
}
|
||||
|
||||
HwOpus::HwOpus() : ServiceFramework("hwopus") {
|
||||
|
||||
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
|
||||
|
||||
auto& instance = Core::System::GetInstance();
|
||||
instance.GetPerfStats().EndGameFrame();
|
||||
instance.Renderer().SwapBuffers(framebuffer);
|
||||
instance.GPU().SwapBuffers(framebuffer);
|
||||
}
|
||||
|
||||
} // namespace Service::Nvidia::Devices
|
||||
|
||||
@@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
|
||||
auto& gpu = system_instance.GPU();
|
||||
auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
|
||||
ASSERT(cpu_addr);
|
||||
system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
|
||||
gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
|
||||
|
||||
params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
|
||||
|
||||
|
||||
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
if (entries.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
|
||||
dma_pusher.Push(std::move(entries));
|
||||
dma_pusher.DispatchCalls();
|
||||
}
|
||||
|
||||
u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
|
||||
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
|
||||
UNIMPLEMENTED();
|
||||
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
|
||||
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
|
||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||
|
||||
PushGPUEntries(std::move(entries));
|
||||
Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
|
||||
|
||||
params.fence_out.id = 0;
|
||||
params.fence_out.value = 0;
|
||||
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
|
||||
Memory::ReadBlock(params.address, entries.data(),
|
||||
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||
|
||||
PushGPUEntries(std::move(entries));
|
||||
Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
|
||||
|
||||
params.fence_out.id = 0;
|
||||
params.fence_out.value = 0;
|
||||
|
||||
@@ -186,7 +186,7 @@ void NVFlinger::Compose() {
|
||||
|
||||
// There was no queued buffer to draw, render previous frame
|
||||
system_instance.GetPerfStats().EndGameFrame();
|
||||
system_instance.Renderer().SwapBuffers({});
|
||||
system_instance.GPU().SwapBuffers({});
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -76,7 +76,8 @@ namespace Service {
|
||||
* Creates a function string for logging, complete with the name (or header code, depending
|
||||
* on what's passed in) the port name, and all the cmd_buff arguments.
|
||||
*/
|
||||
[[maybe_unused]] static std::string MakeFunctionString(const char* name, const char* port_name,
|
||||
[[maybe_unused]] static std::string MakeFunctionString(std::string_view name,
|
||||
std::string_view port_name,
|
||||
const u32* cmd_buff) {
|
||||
// Number of params == bits 0-5 + bits 6-11
|
||||
int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F);
|
||||
@@ -158,9 +159,7 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {
|
||||
return ReportUnimplementedFunction(ctx, info);
|
||||
}
|
||||
|
||||
LOG_TRACE(
|
||||
Service, "{}",
|
||||
MakeFunctionString(info->name, GetServiceName().c_str(), ctx.CommandBuffer()).c_str());
|
||||
LOG_TRACE(Service, "{}", MakeFunctionString(info->name, GetServiceName(), ctx.CommandBuffer()));
|
||||
handler_invoker(this, info->handler_callback, ctx);
|
||||
}
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client};
|
||||
Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
|
||||
rb.PushMoveObjects(session);
|
||||
|
||||
LOG_DEBUG(Service, "session={}", session->GetObjectId());
|
||||
|
||||
@@ -171,9 +171,6 @@ T Read(const VAddr vaddr) {
|
||||
return value;
|
||||
}
|
||||
|
||||
// The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
|
||||
std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
|
||||
|
||||
PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
|
||||
switch (type) {
|
||||
case PageType::Unmapped:
|
||||
@@ -204,9 +201,6 @@ void Write(const VAddr vaddr, const T data) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
|
||||
std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
|
||||
|
||||
PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
|
||||
switch (type) {
|
||||
case PageType::Unmapped:
|
||||
@@ -362,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
|
||||
const VAddr overlap_end = std::min(end, region_end);
|
||||
const VAddr overlap_size = overlap_end - overlap_start;
|
||||
|
||||
auto& rasterizer = system_instance.Renderer().Rasterizer();
|
||||
auto& gpu = system_instance.GPU();
|
||||
switch (mode) {
|
||||
case FlushMode::Flush:
|
||||
rasterizer.FlushRegion(overlap_start, overlap_size);
|
||||
gpu.FlushRegion(overlap_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::Invalidate:
|
||||
rasterizer.InvalidateRegion(overlap_start, overlap_size);
|
||||
gpu.InvalidateRegion(overlap_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::FlushAndInvalidate:
|
||||
rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size);
|
||||
gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -91,7 +91,10 @@ void LogSettings() {
|
||||
LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
|
||||
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
|
||||
LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
|
||||
LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
|
||||
LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
|
||||
LogSetting("Renderer_UseAsynchronousGpuEmulation",
|
||||
Settings::values.use_asynchronous_gpu_emulation);
|
||||
LogSetting("Audio_OutputEngine", Settings::values.sink_id);
|
||||
LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
|
||||
LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
|
||||
|
||||
@@ -393,6 +393,7 @@ struct Values {
|
||||
u16 frame_limit;
|
||||
bool use_disk_shader_cache;
|
||||
bool use_accurate_gpu_emulation;
|
||||
bool use_asynchronous_gpu_emulation;
|
||||
|
||||
float bg_red;
|
||||
float bg_green;
|
||||
|
||||
@@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() {
|
||||
Settings::values.use_disk_shader_cache);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
|
||||
Settings::values.use_accurate_gpu_emulation);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
|
||||
Settings::values.use_asynchronous_gpu_emulation);
|
||||
AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
|
||||
Settings::values.use_docked_mode);
|
||||
}
|
||||
|
||||
@@ -13,11 +13,11 @@
|
||||
namespace ArmTests {
|
||||
|
||||
TestEnvironment::TestEnvironment(bool mutable_memory_)
|
||||
: mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) {
|
||||
|
||||
: mutable_memory(mutable_memory_),
|
||||
test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
|
||||
auto process = Kernel::Process::Create(kernel, "");
|
||||
kernel.MakeCurrentProcess(process.get());
|
||||
page_table = &Core::CurrentProcess()->VMManager().page_table;
|
||||
page_table = &process->VMManager().page_table;
|
||||
|
||||
std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
|
||||
page_table->special_regions.clear();
|
||||
|
||||
@@ -17,6 +17,12 @@ add_library(video_core STATIC
|
||||
engines/shader_header.h
|
||||
gpu.cpp
|
||||
gpu.h
|
||||
gpu_asynch.cpp
|
||||
gpu_asynch.h
|
||||
gpu_synch.cpp
|
||||
gpu_synch.h
|
||||
gpu_thread.cpp
|
||||
gpu_thread.h
|
||||
macro_interpreter.cpp
|
||||
macro_interpreter.h
|
||||
memory_manager.cpp
|
||||
@@ -74,6 +80,7 @@ add_library(video_core STATIC
|
||||
shader/decode/hfma2.cpp
|
||||
shader/decode/conversion.cpp
|
||||
shader/decode/memory.cpp
|
||||
shader/decode/texture.cpp
|
||||
shader/decode/float_set_predicate.cpp
|
||||
shader/decode/integer_set_predicate.cpp
|
||||
shader/decode/half_set_predicate.cpp
|
||||
@@ -94,6 +101,8 @@ add_library(video_core STATIC
|
||||
surface.h
|
||||
textures/astc.cpp
|
||||
textures/astc.h
|
||||
textures/convert.cpp
|
||||
textures/convert.h
|
||||
textures/decoders.cpp
|
||||
textures/decoders.h
|
||||
textures/texture.h
|
||||
@@ -104,6 +113,8 @@ add_library(video_core STATIC
|
||||
if (ENABLE_VULKAN)
|
||||
target_sources(video_core PRIVATE
|
||||
renderer_vulkan/declarations.h
|
||||
renderer_vulkan/maxwell_to_vk.cpp
|
||||
renderer_vulkan/maxwell_to_vk.h
|
||||
renderer_vulkan/vk_buffer_cache.cpp
|
||||
renderer_vulkan/vk_buffer_cache.h
|
||||
renderer_vulkan/vk_device.cpp
|
||||
|
||||
@@ -39,7 +39,7 @@ bool DmaPusher::Step() {
|
||||
}
|
||||
|
||||
const CommandList& command_list{dma_pushbuffer.front()};
|
||||
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
|
||||
const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
|
||||
GPUVAddr dma_get = command_list_header.addr;
|
||||
GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
|
||||
bool non_main = command_list_header.is_non_main;
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "common/assert.h"
|
||||
#include <cstddef>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
@@ -2,9 +2,8 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
|
||||
@@ -5,8 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include <cstddef>
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
@@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) {
|
||||
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
|
||||
// We do this before actually writing the new data because the destination address might contain
|
||||
// a dirty surface that will have to be written back to memory.
|
||||
rasterizer.InvalidateRegion(*dest_address, sizeof(u32));
|
||||
Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
|
||||
|
||||
Memory::Write32(*dest_address, data);
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
@@ -91,12 +92,12 @@ void MaxwellDMA::HandleCopy() {
|
||||
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
|
||||
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
|
||||
// copying.
|
||||
rasterizer.FlushRegion(*source_cpu, src_size);
|
||||
Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
|
||||
|
||||
// We have to invalidate the destination region to evict any outdated surfaces from the
|
||||
// cache. We do this before actually writing the new data because the destination address
|
||||
// might contain a dirty surface that will have to be written back to memory.
|
||||
rasterizer.InvalidateRegion(*dest_cpu, dst_size);
|
||||
Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
|
||||
};
|
||||
|
||||
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
#include <bitset>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
@@ -325,11 +324,11 @@ enum class TextureQueryType : u64 {
|
||||
|
||||
enum class TextureProcessMode : u64 {
|
||||
None = 0,
|
||||
LZ = 1, // Unknown, appears to be the same as none.
|
||||
LZ = 1, // Load LOD of zero.
|
||||
LB = 2, // Load Bias.
|
||||
LL = 3, // Load LOD (LevelOfDetail)
|
||||
LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
|
||||
LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL
|
||||
LL = 3, // Load LOD.
|
||||
LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
|
||||
LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
|
||||
};
|
||||
|
||||
enum class TextureMiscMode : u64 {
|
||||
@@ -1446,6 +1445,7 @@ public:
|
||||
Flow,
|
||||
Synch,
|
||||
Memory,
|
||||
Texture,
|
||||
FloatSet,
|
||||
FloatSetPredicate,
|
||||
IntegerSet,
|
||||
@@ -1576,14 +1576,14 @@ private:
|
||||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
|
||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
|
||||
INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
|
||||
INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"),
|
||||
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
|
||||
INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
|
||||
INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
|
||||
INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"),
|
||||
INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
|
||||
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
||||
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
|
||||
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
|
||||
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
|
||||
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
|
||||
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
|
||||
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
|
||||
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
|
||||
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
|
||||
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
|
||||
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
@@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) {
|
||||
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
|
||||
auto& rasterizer{renderer.Rasterizer()};
|
||||
memory_manager = std::make_unique<Tegra::MemoryManager>();
|
||||
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
|
||||
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
|
||||
|
||||
@@ -16,8 +16,8 @@ class System;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
@@ -119,10 +119,11 @@ enum class EngineID {
|
||||
MAXWELL_DMA_COPY_A = 0xB0B5,
|
||||
};
|
||||
|
||||
class GPU final {
|
||||
class GPU {
|
||||
public:
|
||||
explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
|
||||
~GPU();
|
||||
explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
|
||||
|
||||
virtual ~GPU();
|
||||
|
||||
struct MethodCall {
|
||||
u32 method{};
|
||||
@@ -200,8 +201,42 @@ public:
|
||||
};
|
||||
} regs{};
|
||||
|
||||
/// Push GPU command entries to be processed
|
||||
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
virtual void SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
private:
|
||||
void ProcessBindMethod(const MethodCall& method_call);
|
||||
void ProcessSemaphoreTriggerMethod();
|
||||
void ProcessSemaphoreRelease();
|
||||
void ProcessSemaphoreAcquire();
|
||||
|
||||
/// Calls a GPU puller method.
|
||||
void CallPullerMethod(const MethodCall& method_call);
|
||||
|
||||
/// Calls a GPU engine method.
|
||||
void CallEngineMethod(const MethodCall& method_call);
|
||||
|
||||
/// Determines where the method should be executed.
|
||||
bool ExecuteMethodOnEngine(const MethodCall& method_call);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
|
||||
VideoCore::RendererBase& renderer;
|
||||
|
||||
private:
|
||||
std::unique_ptr<Tegra::MemoryManager> memory_manager;
|
||||
|
||||
/// Mapping of command subchannels to their bound engine ids.
|
||||
@@ -217,18 +252,6 @@ private:
|
||||
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
|
||||
/// Inline memory engine
|
||||
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
|
||||
|
||||
void ProcessBindMethod(const MethodCall& method_call);
|
||||
void ProcessSemaphoreTriggerMethod();
|
||||
void ProcessSemaphoreRelease();
|
||||
void ProcessSemaphoreAcquire();
|
||||
|
||||
// Calls a GPU puller method.
|
||||
void CallPullerMethod(const MethodCall& method_call);
|
||||
// Calls a GPU engine method.
|
||||
void CallEngineMethod(const MethodCall& method_call);
|
||||
// Determines where the method should be executed.
|
||||
bool ExecuteMethodOnEngine(const MethodCall& method_call);
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
|
||||
37
src/video_core/gpu_asynch.cpp
Normal file
37
src/video_core/gpu_asynch.cpp
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/gpu_asynch.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
|
||||
: Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
|
||||
|
||||
GPUAsynch::~GPUAsynch() = default;
|
||||
|
||||
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
gpu_thread.SubmitList(std::move(entries));
|
||||
}
|
||||
|
||||
void GPUAsynch::SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
||||
gpu_thread.SwapBuffers(std::move(framebuffer));
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
37
src/video_core/gpu_asynch.h
Normal file
37
src/video_core/gpu_asynch.h
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
namespace GPUThread {
|
||||
class ThreadManager;
|
||||
} // namespace GPUThread
|
||||
|
||||
/// Implementation of GPU interface that runs the GPU asynchronously
|
||||
class GPUAsynch : public Tegra::GPU {
|
||||
public:
|
||||
explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
|
||||
~GPUAsynch() override;
|
||||
|
||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
|
||||
private:
|
||||
GPUThread::ThreadManager gpu_thread;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
37
src/video_core/gpu_synch.cpp
Normal file
37
src/video_core/gpu_synch.cpp
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/gpu_synch.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
|
||||
: Tegra::GPU(system, renderer) {}
|
||||
|
||||
GPUSynch::~GPUSynch() = default;
|
||||
|
||||
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
dma_pusher->Push(std::move(entries));
|
||||
dma_pusher->DispatchCalls();
|
||||
}
|
||||
|
||||
void GPUSynch::SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
||||
renderer.SwapBuffers(std::move(framebuffer));
|
||||
}
|
||||
|
||||
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
|
||||
renderer.Rasterizer().FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer.Rasterizer().InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
29
src/video_core/gpu_synch.h
Normal file
29
src/video_core/gpu_synch.h
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
/// Implementation of GPU interface that runs the GPU synchronously
|
||||
class GPUSynch : public Tegra::GPU {
|
||||
public:
|
||||
explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
|
||||
~GPUSynch() override;
|
||||
|
||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
152
src/video_core/gpu_thread.cpp
Normal file
152
src/video_core/gpu_thread.cpp
Normal file
@@ -0,0 +1,152 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "core/frontend/scope_acquire_window_context.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCommon::GPUThread {
|
||||
|
||||
/// Executes a single GPU thread command
|
||||
static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
|
||||
Tegra::DmaPusher& dma_pusher) {
|
||||
if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
|
||||
dma_pusher.Push(std::move(submit_list->entries));
|
||||
dma_pusher.DispatchCalls();
|
||||
} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
|
||||
renderer.SwapBuffers(data->framebuffer);
|
||||
} else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
|
||||
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
|
||||
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
|
||||
renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs the GPU thread
|
||||
static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
|
||||
SynchState& state) {
|
||||
|
||||
MicroProfileOnThreadCreate("GpuThread");
|
||||
|
||||
auto WaitForWakeup = [&]() {
|
||||
std::unique_lock<std::mutex> lock{state.signal_mutex};
|
||||
state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
|
||||
};
|
||||
|
||||
// Wait for first GPU command before acquiring the window context
|
||||
WaitForWakeup();
|
||||
|
||||
// If emulation was stopped during disk shader loading, abort before trying to acquire context
|
||||
if (!state.is_running) {
|
||||
return;
|
||||
}
|
||||
|
||||
Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
|
||||
|
||||
while (state.is_running) {
|
||||
if (!state.is_running) {
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
// Thread has been woken up, so make the previous write queue the next read queue
|
||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
||||
std::swap(state.push_queue, state.pop_queue);
|
||||
}
|
||||
|
||||
// Execute all of the GPU commands
|
||||
while (!state.pop_queue->empty()) {
|
||||
ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
|
||||
state.pop_queue->pop();
|
||||
}
|
||||
|
||||
state.UpdateIdleState();
|
||||
|
||||
// Signal that the GPU thread has finished processing commands
|
||||
if (state.is_idle) {
|
||||
state.idle_condition.notify_one();
|
||||
}
|
||||
|
||||
// Wait for CPU thread to send more GPU commands
|
||||
WaitForWakeup();
|
||||
}
|
||||
}
|
||||
|
||||
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
|
||||
: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
|
||||
std::ref(dma_pusher), std::ref(state)},
|
||||
thread_id{thread.get_id()} {}
|
||||
|
||||
ThreadManager::~ThreadManager() {
|
||||
{
|
||||
// Notify GPU thread that a shutdown is pending
|
||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
||||
state.is_running = false;
|
||||
}
|
||||
|
||||
state.signal_condition.notify_one();
|
||||
thread.join();
|
||||
}
|
||||
|
||||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
||||
if (entries.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
PushCommand(SubmitListCommand(std::move(entries)), false, false);
|
||||
}
|
||||
|
||||
void ThreadManager::SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
||||
PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
|
||||
}
|
||||
|
||||
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||
// Block the CPU when using accurate emulation
|
||||
PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
|
||||
}
|
||||
|
||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||
PushCommand(InvalidateRegionCommand(addr, size), true, true);
|
||||
}
|
||||
|
||||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{state.signal_mutex};
|
||||
|
||||
if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
|
||||
// Execute the command synchronously on the current thread
|
||||
ExecuteCommand(&command_data, renderer, dma_pusher);
|
||||
return;
|
||||
}
|
||||
|
||||
// Push the command to the GPU thread
|
||||
state.UpdateIdleState();
|
||||
state.push_queue->emplace(command_data);
|
||||
}
|
||||
|
||||
// Signal the GPU thread that commands are pending
|
||||
state.signal_condition.notify_one();
|
||||
|
||||
if (wait_for_idle) {
|
||||
// Wait for the GPU to be idle (all commands to be executed)
|
||||
std::unique_lock<std::mutex> lock{state.idle_mutex};
|
||||
state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::GPUThread
|
||||
133
src/video_core/gpu_thread.h
Normal file
133
src/video_core/gpu_thread.h
Normal file
@@ -0,0 +1,133 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
#include <variant>
|
||||
|
||||
namespace Tegra {
|
||||
struct FramebufferConfig;
|
||||
class DmaPusher;
|
||||
} // namespace Tegra
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace VideoCommon::GPUThread {
|
||||
|
||||
/// Command to signal to the GPU thread that a command list is ready for processing
|
||||
struct SubmitListCommand final {
|
||||
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
|
||||
|
||||
Tegra::CommandList entries;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread that a swap buffers is pending
|
||||
struct SwapBuffersCommand final {
|
||||
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
|
||||
: framebuffer{std::move(framebuffer)} {}
|
||||
|
||||
std::optional<const Tegra::FramebufferConfig> framebuffer;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to flush a region
|
||||
struct FlushRegionCommand final {
|
||||
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
|
||||
const VAddr addr;
|
||||
const u64 size;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to invalidate a region
|
||||
struct InvalidateRegionCommand final {
|
||||
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
|
||||
const VAddr addr;
|
||||
const u64 size;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to flush and invalidate a region
|
||||
struct FlushAndInvalidateRegionCommand final {
|
||||
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
|
||||
: addr{addr}, size{size} {}
|
||||
|
||||
const VAddr addr;
|
||||
const u64 size;
|
||||
};
|
||||
|
||||
using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
||||
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
|
||||
|
||||
/// Struct used to synchronize the GPU thread
|
||||
struct SynchState final {
|
||||
std::atomic<bool> is_running{true};
|
||||
std::atomic<bool> is_idle{true};
|
||||
std::condition_variable signal_condition;
|
||||
std::mutex signal_mutex;
|
||||
std::condition_variable idle_condition;
|
||||
std::mutex idle_mutex;
|
||||
|
||||
// We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
|
||||
// one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
|
||||
// empty. This allows for efficient thread-safe access, as it does not require any copies.
|
||||
|
||||
using CommandQueue = std::queue<CommandData>;
|
||||
std::array<CommandQueue, 2> command_queues;
|
||||
CommandQueue* push_queue{&command_queues[0]};
|
||||
CommandQueue* pop_queue{&command_queues[1]};
|
||||
|
||||
void UpdateIdleState() {
|
||||
std::lock_guard<std::mutex> lock{idle_mutex};
|
||||
is_idle = command_queues[0].empty() && command_queues[1].empty();
|
||||
}
|
||||
};
|
||||
|
||||
/// Class used to manage the GPU thread
|
||||
class ThreadManager final {
|
||||
public:
|
||||
explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
|
||||
~ThreadManager();
|
||||
|
||||
/// Push GPU command entries to be processed
|
||||
void SubmitList(Tegra::CommandList&& entries);
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
void SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
void FlushRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
void InvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
private:
|
||||
/// Pushes a command to be executed by the GPU thread
|
||||
void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
|
||||
|
||||
/// Returns true if this is called by the GPU thread
|
||||
bool IsGpuThread() const {
|
||||
return std::this_thread::get_id() == thread_id;
|
||||
}
|
||||
|
||||
private:
|
||||
SynchState state;
|
||||
VideoCore::RendererBase& renderer;
|
||||
Tegra::DmaPusher& dma_pusher;
|
||||
std::thread thread;
|
||||
std::thread::id thread_id;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::GPUThread
|
||||
@@ -2,6 +2,7 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
@@ -118,7 +118,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst
|
||||
|
||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
|
||||
|
||||
LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
|
||||
LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
|
||||
CheckExtensions();
|
||||
}
|
||||
|
||||
@@ -177,7 +177,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
|
||||
continue;
|
||||
|
||||
const auto& buffer = regs.vertex_array[attrib.buffer];
|
||||
LOG_TRACE(HW_GPU,
|
||||
LOG_TRACE(Render_OpenGL,
|
||||
"vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
|
||||
index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
|
||||
attrib.offset.Value(), attrib.IsNormalized());
|
||||
@@ -343,9 +343,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
shader_program_manager->UseProgrammableFragmentShader(program_handle);
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
|
||||
shader_config.enable.Value(), shader_config.offset);
|
||||
UNREACHABLE();
|
||||
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
|
||||
shader_config.enable.Value(), shader_config.offset);
|
||||
}
|
||||
|
||||
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
|
||||
@@ -739,33 +738,17 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
state.Apply();
|
||||
|
||||
res_cache.SignalPreDrawCall();
|
||||
|
||||
// Execute draw call
|
||||
params.DispatchDraw();
|
||||
|
||||
res_cache.SignalPostDrawCall();
|
||||
|
||||
// Disable scissor test
|
||||
state.viewports[0].scissor.enabled = false;
|
||||
|
||||
accelerate_draw = AccelDraw::Disabled;
|
||||
|
||||
// Unbind textures for potential future use as framebuffer attachments
|
||||
for (auto& texture_unit : state.texture_units) {
|
||||
texture_unit.Unbind();
|
||||
}
|
||||
state.Apply();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushAll() {}
|
||||
|
||||
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
if (Settings::values.use_accurate_gpu_emulation) {
|
||||
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
|
||||
res_cache.FlushRegion(addr, size);
|
||||
}
|
||||
res_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
@@ -809,7 +792,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
|
||||
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
|
||||
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
|
||||
ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
|
||||
|
||||
if (params.pixel_format != pixel_format) {
|
||||
LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
|
||||
}
|
||||
|
||||
screen_info.display_texture = surface->Texture().handle;
|
||||
|
||||
@@ -955,8 +941,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
|
||||
size = buffer.size;
|
||||
|
||||
if (size > MaxConstbufferSize) {
|
||||
LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size,
|
||||
MaxConstbufferSize);
|
||||
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
|
||||
MaxConstbufferSize);
|
||||
size = MaxConstbufferSize;
|
||||
}
|
||||
} else {
|
||||
@@ -1016,10 +1002,9 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
|
||||
|
||||
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
|
||||
|
||||
Surface surface = res_cache.GetTextureSurface(texture, entry);
|
||||
if (surface != nullptr) {
|
||||
if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
|
||||
state.texture_units[current_bindpoint].texture =
|
||||
entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
|
||||
surface->Texture(entry.IsArray()).handle;
|
||||
surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
|
||||
texture.tic.w_source);
|
||||
} else {
|
||||
@@ -1251,11 +1236,7 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
|
||||
|
||||
void RasterizerOpenGL::SyncTransformFeedback() {
|
||||
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
||||
|
||||
if (regs.tfb_enabled != 0) {
|
||||
LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
|
||||
UNREACHABLE();
|
||||
}
|
||||
UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncPointState() {
|
||||
@@ -1275,12 +1256,8 @@ void RasterizerOpenGL::SyncPolygonOffset() {
|
||||
|
||||
void RasterizerOpenGL::CheckAlphaTests() {
|
||||
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
||||
|
||||
if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) {
|
||||
LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
|
||||
"this behavior is undefined.");
|
||||
UNREACHABLE();
|
||||
}
|
||||
UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
|
||||
"Alpha Testing is enabled with more than one rendertarget");
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/astc.h"
|
||||
#include "video_core/textures/convert.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace OpenGL {
|
||||
@@ -400,6 +400,27 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
|
||||
return format;
|
||||
}
|
||||
|
||||
/// Returns the discrepant array target
|
||||
constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
|
||||
switch (target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
return GL_TEXTURE_1D_ARRAY;
|
||||
case SurfaceTarget::Texture2D:
|
||||
return GL_TEXTURE_2D_ARRAY;
|
||||
case SurfaceTarget::Texture3D:
|
||||
return GL_NONE;
|
||||
case SurfaceTarget::Texture1DArray:
|
||||
return GL_TEXTURE_1D;
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
return GL_TEXTURE_2D;
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
return GL_TEXTURE_CUBE_MAP_ARRAY;
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
return GL_TEXTURE_CUBE_MAP;
|
||||
}
|
||||
return GL_NONE;
|
||||
}
|
||||
|
||||
Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
|
||||
u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
|
||||
if (IsPixelFormatASTC(pixel_format)) {
|
||||
@@ -597,103 +618,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
|
||||
union S8Z24 {
|
||||
BitField<0, 24, u32> z24;
|
||||
BitField<24, 8, u32> s8;
|
||||
};
|
||||
static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
|
||||
|
||||
union Z24S8 {
|
||||
BitField<0, 8, u32> s8;
|
||||
BitField<8, 24, u32> z24;
|
||||
};
|
||||
static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
|
||||
|
||||
S8Z24 s8z24_pixel{};
|
||||
Z24S8 z24s8_pixel{};
|
||||
constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
|
||||
for (std::size_t y = 0; y < height; ++y) {
|
||||
for (std::size_t x = 0; x < width; ++x) {
|
||||
const std::size_t offset{bpp * (y * width + x)};
|
||||
if (reverse) {
|
||||
std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
|
||||
s8z24_pixel.s8.Assign(z24s8_pixel.s8);
|
||||
s8z24_pixel.z24.Assign(z24s8_pixel.z24);
|
||||
std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
|
||||
} else {
|
||||
std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
|
||||
z24s8_pixel.s8.Assign(s8z24_pixel.s8);
|
||||
z24s8_pixel.z24.Assign(s8z24_pixel.z24);
|
||||
std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to perform software conversion (as needed) when loading a buffer from Switch
|
||||
* memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
|
||||
* typical desktop GPUs.
|
||||
*/
|
||||
static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
|
||||
u32 width, u32 height, u32 depth) {
|
||||
switch (pixel_format) {
|
||||
case PixelFormat::ASTC_2D_4X4:
|
||||
case PixelFormat::ASTC_2D_8X8:
|
||||
case PixelFormat::ASTC_2D_8X5:
|
||||
case PixelFormat::ASTC_2D_5X4:
|
||||
case PixelFormat::ASTC_2D_5X5:
|
||||
case PixelFormat::ASTC_2D_4X4_SRGB:
|
||||
case PixelFormat::ASTC_2D_8X8_SRGB:
|
||||
case PixelFormat::ASTC_2D_8X5_SRGB:
|
||||
case PixelFormat::ASTC_2D_5X4_SRGB:
|
||||
case PixelFormat::ASTC_2D_5X5_SRGB:
|
||||
case PixelFormat::ASTC_2D_10X8:
|
||||
case PixelFormat::ASTC_2D_10X8_SRGB: {
|
||||
// Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
|
||||
u32 block_width{};
|
||||
u32 block_height{};
|
||||
std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
|
||||
data =
|
||||
Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
|
||||
break;
|
||||
}
|
||||
case PixelFormat::S8Z24:
|
||||
// Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
|
||||
ConvertS8Z24ToZ24S8(data, width, height, false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
|
||||
* Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
|
||||
* with typical desktop GPUs.
|
||||
*/
|
||||
static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
|
||||
u32 width, u32 height) {
|
||||
switch (pixel_format) {
|
||||
case PixelFormat::ASTC_2D_4X4:
|
||||
case PixelFormat::ASTC_2D_8X8:
|
||||
case PixelFormat::ASTC_2D_4X4_SRGB:
|
||||
case PixelFormat::ASTC_2D_8X8_SRGB:
|
||||
case PixelFormat::ASTC_2D_5X5:
|
||||
case PixelFormat::ASTC_2D_5X5_SRGB:
|
||||
case PixelFormat::ASTC_2D_10X8:
|
||||
case PixelFormat::ASTC_2D_10X8_SRGB: {
|
||||
LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
|
||||
static_cast<u32>(pixel_format));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
case PixelFormat::S8Z24:
|
||||
// Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
|
||||
ConvertS8Z24ToZ24S8(data, width, height, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
|
||||
void CachedSurface::LoadGLBuffer() {
|
||||
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
|
||||
@@ -722,8 +646,16 @@ void CachedSurface::LoadGLBuffer() {
|
||||
}
|
||||
}
|
||||
for (u32 i = 0; i < params.max_mip_level; i++) {
|
||||
ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
|
||||
params.MipHeight(i), params.MipDepth(i));
|
||||
const u32 width = params.MipWidth(i);
|
||||
const u32 height = params.MipHeight(i);
|
||||
const u32 depth = params.MipDepth(i);
|
||||
if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
|
||||
// Reserve size for RGBA8 conversion
|
||||
constexpr std::size_t rgba_bpp = 4;
|
||||
gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
|
||||
}
|
||||
Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
|
||||
height, depth, true, true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -746,8 +678,8 @@ void CachedSurface::FlushGLBuffer() {
|
||||
glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
|
||||
static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width,
|
||||
params.height);
|
||||
Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
|
||||
params.height, params.depth, true, true);
|
||||
const u8* const texture_src_data = Memory::GetPointer(params.addr);
|
||||
ASSERT(texture_src_data);
|
||||
if (params.is_tiled) {
|
||||
@@ -884,20 +816,22 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
}
|
||||
|
||||
void CachedSurface::EnsureTextureView() {
|
||||
if (texture_view.handle != 0)
|
||||
void CachedSurface::EnsureTextureDiscrepantView() {
|
||||
if (discrepant_view.handle != 0)
|
||||
return;
|
||||
|
||||
const GLenum target{TargetLayer()};
|
||||
const GLenum target{GetArrayDiscrepantTarget(params.target)};
|
||||
ASSERT(target != GL_NONE);
|
||||
|
||||
const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
|
||||
constexpr GLuint min_layer = 0;
|
||||
constexpr GLuint min_level = 0;
|
||||
|
||||
glGenTextures(1, &texture_view.handle);
|
||||
glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level,
|
||||
glGenTextures(1, &discrepant_view.handle);
|
||||
glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
|
||||
params.max_mip_level, min_layer, num_layers);
|
||||
ApplyTextureDefaults(texture_view.handle, params.max_mip_level);
|
||||
glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
|
||||
ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
|
||||
glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
|
||||
reinterpret_cast<const GLint*>(swizzle.data()));
|
||||
}
|
||||
|
||||
@@ -923,8 +857,8 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
|
||||
swizzle = {new_x, new_y, new_z, new_w};
|
||||
const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
|
||||
glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
|
||||
if (texture_view.handle != 0) {
|
||||
glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
|
||||
if (discrepant_view.handle != 0) {
|
||||
glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -367,31 +367,19 @@ public:
|
||||
return texture;
|
||||
}
|
||||
|
||||
const OGLTexture& TextureLayer() {
|
||||
if (params.is_array) {
|
||||
return Texture();
|
||||
const OGLTexture& Texture(bool as_array) {
|
||||
if (params.is_array == as_array) {
|
||||
return texture;
|
||||
} else {
|
||||
EnsureTextureDiscrepantView();
|
||||
return discrepant_view;
|
||||
}
|
||||
EnsureTextureView();
|
||||
return texture_view;
|
||||
}
|
||||
|
||||
GLenum Target() const {
|
||||
return gl_target;
|
||||
}
|
||||
|
||||
GLenum TargetLayer() const {
|
||||
using VideoCore::Surface::SurfaceTarget;
|
||||
switch (params.target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
return GL_TEXTURE_1D_ARRAY;
|
||||
case SurfaceTarget::Texture2D:
|
||||
return GL_TEXTURE_2D_ARRAY;
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
return GL_TEXTURE_CUBE_MAP_ARRAY;
|
||||
}
|
||||
return Target();
|
||||
}
|
||||
|
||||
const SurfaceParams& GetSurfaceParams() const {
|
||||
return params;
|
||||
}
|
||||
@@ -431,10 +419,10 @@ public:
|
||||
private:
|
||||
void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
|
||||
|
||||
void EnsureTextureView();
|
||||
void EnsureTextureDiscrepantView();
|
||||
|
||||
OGLTexture texture;
|
||||
OGLTexture texture_view;
|
||||
OGLTexture discrepant_view;
|
||||
std::vector<std::vector<u8>> gl_buffer;
|
||||
SurfaceParams params{};
|
||||
GLenum gl_target{};
|
||||
|
||||
@@ -5,7 +5,9 @@
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
@@ -717,7 +719,7 @@ private:
|
||||
}
|
||||
|
||||
std::string GenerateTexture(Operation operation, const std::string& func,
|
||||
bool is_extra_int = false) {
|
||||
const std::vector<std::pair<Type, Node>>& extras) {
|
||||
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
|
||||
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
@@ -738,36 +740,47 @@ private:
|
||||
expr += Visit(operation[i]);
|
||||
|
||||
const std::size_t next = i + 1;
|
||||
if (next < count || has_array || has_shadow)
|
||||
if (next < count)
|
||||
expr += ", ";
|
||||
}
|
||||
if (has_array) {
|
||||
expr += "float(ftoi(" + Visit(meta->array) + "))";
|
||||
expr += ", float(ftoi(" + Visit(meta->array) + "))";
|
||||
}
|
||||
if (has_shadow) {
|
||||
if (has_array)
|
||||
expr += ", ";
|
||||
expr += Visit(meta->depth_compare);
|
||||
expr += ", " + Visit(meta->depth_compare);
|
||||
}
|
||||
expr += ')';
|
||||
|
||||
for (const Node extra : meta->extras) {
|
||||
for (const auto& extra_pair : extras) {
|
||||
const auto [type, operand] = extra_pair;
|
||||
if (operand == nullptr) {
|
||||
continue;
|
||||
}
|
||||
expr += ", ";
|
||||
if (is_extra_int) {
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(extra)) {
|
||||
|
||||
switch (type) {
|
||||
case Type::Int:
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
|
||||
// Inline the string as an immediate integer in GLSL (some extra arguments are
|
||||
// required to be constant)
|
||||
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
|
||||
} else {
|
||||
expr += "ftoi(" + Visit(extra) + ')';
|
||||
expr += "ftoi(" + Visit(operand) + ')';
|
||||
}
|
||||
} else {
|
||||
expr += Visit(extra);
|
||||
break;
|
||||
case Type::Float:
|
||||
expr += Visit(operand);
|
||||
break;
|
||||
default: {
|
||||
const auto type_int = static_cast<u32>(type);
|
||||
UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
|
||||
expr += '0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expr += ')';
|
||||
return expr;
|
||||
return expr + ')';
|
||||
}
|
||||
|
||||
std::string Assign(Operation operation) {
|
||||
@@ -1146,7 +1159,7 @@ private:
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(operation, "texture");
|
||||
std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
|
||||
if (meta->sampler.IsShadow()) {
|
||||
expr = "vec4(" + expr + ')';
|
||||
}
|
||||
@@ -1157,7 +1170,7 @@ private:
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(operation, "textureLod");
|
||||
std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
|
||||
if (meta->sampler.IsShadow()) {
|
||||
expr = "vec4(" + expr + ')';
|
||||
}
|
||||
@@ -1168,7 +1181,8 @@ private:
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) +
|
||||
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
|
||||
return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
|
||||
GetSwizzle(meta->element);
|
||||
}
|
||||
|
||||
@@ -1197,8 +1211,8 @@ private:
|
||||
ASSERT(meta);
|
||||
|
||||
if (meta->element < 2) {
|
||||
return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" +
|
||||
GetSwizzle(meta->element) + "))";
|
||||
return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
|
||||
" * vec2(256))" + GetSwizzle(meta->element) + "))";
|
||||
}
|
||||
return "0";
|
||||
}
|
||||
@@ -1224,9 +1238,9 @@ private:
|
||||
else if (next < count)
|
||||
expr += ", ";
|
||||
}
|
||||
for (std::size_t i = 0; i < meta->extras.size(); ++i) {
|
||||
if (meta->lod) {
|
||||
expr += ", ";
|
||||
expr += CastOperand(Visit(meta->extras.at(i)), Type::Int);
|
||||
expr += CastOperand(Visit(meta->lod), Type::Int);
|
||||
}
|
||||
expr += ')';
|
||||
|
||||
|
||||
@@ -461,7 +461,7 @@ void OpenGLState::ApplyTextures() const {
|
||||
|
||||
if (has_delta) {
|
||||
glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
|
||||
textures.data());
|
||||
textures.data() + first);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -482,7 +482,7 @@ void OpenGLState::ApplySamplers() const {
|
||||
}
|
||||
if (has_delta) {
|
||||
glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
|
||||
samplers.data());
|
||||
samplers.data() + first);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -244,6 +244,21 @@ void RendererOpenGL::InitOpenGLObjects() {
|
||||
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
|
||||
}
|
||||
|
||||
void RendererOpenGL::AddTelemetryFields() {
|
||||
const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
|
||||
const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
|
||||
const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
|
||||
|
||||
LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
|
||||
LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
|
||||
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
|
||||
|
||||
auto& telemetry_session = system.TelemetrySession();
|
||||
telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
|
||||
telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
|
||||
telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
|
||||
}
|
||||
|
||||
void RendererOpenGL::CreateRasterizer() {
|
||||
if (rasterizer) {
|
||||
return;
|
||||
@@ -466,17 +481,7 @@ bool RendererOpenGL::Init() {
|
||||
glDebugMessageCallback(DebugHandler, nullptr);
|
||||
}
|
||||
|
||||
const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
|
||||
const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
|
||||
const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
|
||||
|
||||
LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
|
||||
LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
|
||||
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
|
||||
|
||||
Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
|
||||
Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
|
||||
Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
|
||||
AddTelemetryFields();
|
||||
|
||||
if (!GLAD_GL_VERSION_4_3) {
|
||||
return false;
|
||||
|
||||
@@ -60,6 +60,7 @@ public:
|
||||
|
||||
private:
|
||||
void InitOpenGLObjects();
|
||||
void AddTelemetryFields();
|
||||
void CreateRasterizer();
|
||||
|
||||
void ConfigureFramebufferTexture(TextureInfo& texture,
|
||||
|
||||
483
src/video_core/renderer_vulkan/maxwell_to_vk.cpp
Normal file
483
src/video_core/renderer_vulkan/maxwell_to_vk.cpp
Normal file
@@ -0,0 +1,483 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace Vulkan::MaxwellToVK {
|
||||
|
||||
namespace Sampler {
|
||||
|
||||
vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
|
||||
switch (filter) {
|
||||
case Tegra::Texture::TextureFilter::Linear:
|
||||
return vk::Filter::eLinear;
|
||||
case Tegra::Texture::TextureFilter::Nearest:
|
||||
return vk::Filter::eNearest;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
|
||||
switch (mipmap_filter) {
|
||||
case Tegra::Texture::TextureMipmapFilter::None:
|
||||
// TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
|
||||
// (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
|
||||
// use an image view with a single mipmap level to emulate this.
|
||||
return vk::SamplerMipmapMode::eLinear;
|
||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||
return vk::SamplerMipmapMode::eLinear;
|
||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||
return vk::SamplerMipmapMode::eNearest;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
|
||||
switch (wrap_mode) {
|
||||
case Tegra::Texture::WrapMode::Wrap:
|
||||
return vk::SamplerAddressMode::eRepeat;
|
||||
case Tegra::Texture::WrapMode::Mirror:
|
||||
return vk::SamplerAddressMode::eMirroredRepeat;
|
||||
case Tegra::Texture::WrapMode::ClampToEdge:
|
||||
return vk::SamplerAddressMode::eClampToEdge;
|
||||
case Tegra::Texture::WrapMode::Border:
|
||||
return vk::SamplerAddressMode::eClampToBorder;
|
||||
case Tegra::Texture::WrapMode::ClampOGL:
|
||||
// TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
|
||||
// eClampToBorder to get the border color of the texture, and then sample the edge to
|
||||
// manually mix them. However the shader part of this is not yet implemented.
|
||||
return vk::SamplerAddressMode::eClampToBorder;
|
||||
case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
|
||||
return vk::SamplerAddressMode::eMirrorClampToEdge;
|
||||
case Tegra::Texture::WrapMode::MirrorOnceBorder:
|
||||
UNIMPLEMENTED();
|
||||
return vk::SamplerAddressMode::eMirrorClampToEdge;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
|
||||
switch (depth_compare_func) {
|
||||
case Tegra::Texture::DepthCompareFunc::Never:
|
||||
return vk::CompareOp::eNever;
|
||||
case Tegra::Texture::DepthCompareFunc::Less:
|
||||
return vk::CompareOp::eLess;
|
||||
case Tegra::Texture::DepthCompareFunc::LessEqual:
|
||||
return vk::CompareOp::eLessOrEqual;
|
||||
case Tegra::Texture::DepthCompareFunc::Equal:
|
||||
return vk::CompareOp::eEqual;
|
||||
case Tegra::Texture::DepthCompareFunc::NotEqual:
|
||||
return vk::CompareOp::eNotEqual;
|
||||
case Tegra::Texture::DepthCompareFunc::Greater:
|
||||
return vk::CompareOp::eGreater;
|
||||
case Tegra::Texture::DepthCompareFunc::GreaterEqual:
|
||||
return vk::CompareOp::eGreaterOrEqual;
|
||||
case Tegra::Texture::DepthCompareFunc::Always:
|
||||
return vk::CompareOp::eAlways;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
|
||||
static_cast<u32>(depth_compare_func));
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace Sampler
|
||||
|
||||
struct FormatTuple {
|
||||
vk::Format format; ///< Vulkan format
|
||||
ComponentType component_type; ///< Abstracted component type
|
||||
bool attachable; ///< True when this format can be used as an attachment
|
||||
};
|
||||
|
||||
static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
|
||||
{vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI
|
||||
{vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U
|
||||
{vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U
|
||||
{vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI
|
||||
{vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1
|
||||
{vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23
|
||||
{vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45
|
||||
{vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F
|
||||
{vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4
|
||||
|
||||
// Compressed sRGB formats
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
|
||||
|
||||
// Depth formats
|
||||
{vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
|
||||
{vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16
|
||||
|
||||
// DepthStencil formats
|
||||
{vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
|
||||
{vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
|
||||
{vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8
|
||||
}};
|
||||
|
||||
static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
|
||||
return pixel_format >= PixelFormat::MaxColorFormat &&
|
||||
pixel_format < PixelFormat::MaxDepthStencilFormat;
|
||||
}
|
||||
|
||||
std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
|
||||
PixelFormat pixel_format, ComponentType component_type) {
|
||||
ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
|
||||
|
||||
const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
|
||||
UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
|
||||
"Unimplemented texture format with pixel format={} and component type={}",
|
||||
static_cast<u32>(pixel_format), static_cast<u32>(component_type));
|
||||
ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
|
||||
|
||||
auto usage = vk::FormatFeatureFlagBits::eSampledImage |
|
||||
vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
|
||||
if (tuple.attachable) {
|
||||
usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
|
||||
: vk::FormatFeatureFlagBits::eColorAttachment;
|
||||
}
|
||||
return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
|
||||
}
|
||||
|
||||
vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
|
||||
switch (stage) {
|
||||
case Maxwell::ShaderStage::Vertex:
|
||||
return vk::ShaderStageFlagBits::eVertex;
|
||||
case Maxwell::ShaderStage::TesselationControl:
|
||||
return vk::ShaderStageFlagBits::eTessellationControl;
|
||||
case Maxwell::ShaderStage::TesselationEval:
|
||||
return vk::ShaderStageFlagBits::eTessellationEvaluation;
|
||||
case Maxwell::ShaderStage::Geometry:
|
||||
return vk::ShaderStageFlagBits::eGeometry;
|
||||
case Maxwell::ShaderStage::Fragment:
|
||||
return vk::ShaderStageFlagBits::eFragment;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
|
||||
switch (topology) {
|
||||
case Maxwell::PrimitiveTopology::Points:
|
||||
return vk::PrimitiveTopology::ePointList;
|
||||
case Maxwell::PrimitiveTopology::Lines:
|
||||
return vk::PrimitiveTopology::eLineList;
|
||||
case Maxwell::PrimitiveTopology::LineStrip:
|
||||
return vk::PrimitiveTopology::eLineStrip;
|
||||
case Maxwell::PrimitiveTopology::Triangles:
|
||||
return vk::PrimitiveTopology::eTriangleList;
|
||||
case Maxwell::PrimitiveTopology::TriangleStrip:
|
||||
return vk::PrimitiveTopology::eTriangleStrip;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
|
||||
switch (type) {
|
||||
case Maxwell::VertexAttribute::Type::SignedNorm:
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::UnsignedNorm:
|
||||
switch (size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::SignedInt:
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::UnsignedInt:
|
||||
switch (size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_32:
|
||||
return vk::Format::eR32Uint;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
case Maxwell::VertexAttribute::Type::UnsignedScaled:
|
||||
case Maxwell::VertexAttribute::Type::SignedScaled:
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::Float:
|
||||
switch (size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
|
||||
return vk::Format::eR32G32B32A32Sfloat;
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32:
|
||||
return vk::Format::eR32G32B32Sfloat;
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32:
|
||||
return vk::Format::eR32G32Sfloat;
|
||||
case Maxwell::VertexAttribute::Size::Size_32:
|
||||
return vk::Format::eR32Sfloat;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
|
||||
static_cast<u32>(size));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
|
||||
switch (comparison) {
|
||||
case Maxwell::ComparisonOp::Never:
|
||||
case Maxwell::ComparisonOp::NeverOld:
|
||||
return vk::CompareOp::eNever;
|
||||
case Maxwell::ComparisonOp::Less:
|
||||
case Maxwell::ComparisonOp::LessOld:
|
||||
return vk::CompareOp::eLess;
|
||||
case Maxwell::ComparisonOp::Equal:
|
||||
case Maxwell::ComparisonOp::EqualOld:
|
||||
return vk::CompareOp::eEqual;
|
||||
case Maxwell::ComparisonOp::LessEqual:
|
||||
case Maxwell::ComparisonOp::LessEqualOld:
|
||||
return vk::CompareOp::eLessOrEqual;
|
||||
case Maxwell::ComparisonOp::Greater:
|
||||
case Maxwell::ComparisonOp::GreaterOld:
|
||||
return vk::CompareOp::eGreater;
|
||||
case Maxwell::ComparisonOp::NotEqual:
|
||||
case Maxwell::ComparisonOp::NotEqualOld:
|
||||
return vk::CompareOp::eNotEqual;
|
||||
case Maxwell::ComparisonOp::GreaterEqual:
|
||||
case Maxwell::ComparisonOp::GreaterEqualOld:
|
||||
return vk::CompareOp::eGreaterOrEqual;
|
||||
case Maxwell::ComparisonOp::Always:
|
||||
case Maxwell::ComparisonOp::AlwaysOld:
|
||||
return vk::CompareOp::eAlways;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
|
||||
switch (index_format) {
|
||||
case Maxwell::IndexFormat::UnsignedByte:
|
||||
UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
|
||||
return vk::IndexType::eUint16;
|
||||
case Maxwell::IndexFormat::UnsignedShort:
|
||||
return vk::IndexType::eUint16;
|
||||
case Maxwell::IndexFormat::UnsignedInt:
|
||||
return vk::IndexType::eUint32;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
|
||||
switch (stencil_op) {
|
||||
case Maxwell::StencilOp::Keep:
|
||||
case Maxwell::StencilOp::KeepOGL:
|
||||
return vk::StencilOp::eKeep;
|
||||
case Maxwell::StencilOp::Zero:
|
||||
case Maxwell::StencilOp::ZeroOGL:
|
||||
return vk::StencilOp::eZero;
|
||||
case Maxwell::StencilOp::Replace:
|
||||
case Maxwell::StencilOp::ReplaceOGL:
|
||||
return vk::StencilOp::eReplace;
|
||||
case Maxwell::StencilOp::Incr:
|
||||
case Maxwell::StencilOp::IncrOGL:
|
||||
return vk::StencilOp::eIncrementAndClamp;
|
||||
case Maxwell::StencilOp::Decr:
|
||||
case Maxwell::StencilOp::DecrOGL:
|
||||
return vk::StencilOp::eDecrementAndClamp;
|
||||
case Maxwell::StencilOp::Invert:
|
||||
case Maxwell::StencilOp::InvertOGL:
|
||||
return vk::StencilOp::eInvert;
|
||||
case Maxwell::StencilOp::IncrWrap:
|
||||
case Maxwell::StencilOp::IncrWrapOGL:
|
||||
return vk::StencilOp::eIncrementAndWrap;
|
||||
case Maxwell::StencilOp::DecrWrap:
|
||||
case Maxwell::StencilOp::DecrWrapOGL:
|
||||
return vk::StencilOp::eDecrementAndWrap;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
|
||||
switch (equation) {
|
||||
case Maxwell::Blend::Equation::Add:
|
||||
case Maxwell::Blend::Equation::AddGL:
|
||||
return vk::BlendOp::eAdd;
|
||||
case Maxwell::Blend::Equation::Subtract:
|
||||
case Maxwell::Blend::Equation::SubtractGL:
|
||||
return vk::BlendOp::eSubtract;
|
||||
case Maxwell::Blend::Equation::ReverseSubtract:
|
||||
case Maxwell::Blend::Equation::ReverseSubtractGL:
|
||||
return vk::BlendOp::eReverseSubtract;
|
||||
case Maxwell::Blend::Equation::Min:
|
||||
case Maxwell::Blend::Equation::MinGL:
|
||||
return vk::BlendOp::eMin;
|
||||
case Maxwell::Blend::Equation::Max:
|
||||
case Maxwell::Blend::Equation::MaxGL:
|
||||
return vk::BlendOp::eMax;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
|
||||
switch (factor) {
|
||||
case Maxwell::Blend::Factor::Zero:
|
||||
case Maxwell::Blend::Factor::ZeroGL:
|
||||
return vk::BlendFactor::eZero;
|
||||
case Maxwell::Blend::Factor::One:
|
||||
case Maxwell::Blend::Factor::OneGL:
|
||||
return vk::BlendFactor::eOne;
|
||||
case Maxwell::Blend::Factor::SourceColor:
|
||||
case Maxwell::Blend::Factor::SourceColorGL:
|
||||
return vk::BlendFactor::eSrcColor;
|
||||
case Maxwell::Blend::Factor::OneMinusSourceColor:
|
||||
case Maxwell::Blend::Factor::OneMinusSourceColorGL:
|
||||
return vk::BlendFactor::eOneMinusSrcColor;
|
||||
case Maxwell::Blend::Factor::SourceAlpha:
|
||||
case Maxwell::Blend::Factor::SourceAlphaGL:
|
||||
return vk::BlendFactor::eSrcAlpha;
|
||||
case Maxwell::Blend::Factor::OneMinusSourceAlpha:
|
||||
case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
|
||||
return vk::BlendFactor::eOneMinusSrcAlpha;
|
||||
case Maxwell::Blend::Factor::DestAlpha:
|
||||
case Maxwell::Blend::Factor::DestAlphaGL:
|
||||
return vk::BlendFactor::eDstAlpha;
|
||||
case Maxwell::Blend::Factor::OneMinusDestAlpha:
|
||||
case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
|
||||
return vk::BlendFactor::eOneMinusDstAlpha;
|
||||
case Maxwell::Blend::Factor::DestColor:
|
||||
case Maxwell::Blend::Factor::DestColorGL:
|
||||
return vk::BlendFactor::eDstColor;
|
||||
case Maxwell::Blend::Factor::OneMinusDestColor:
|
||||
case Maxwell::Blend::Factor::OneMinusDestColorGL:
|
||||
return vk::BlendFactor::eOneMinusDstColor;
|
||||
case Maxwell::Blend::Factor::SourceAlphaSaturate:
|
||||
case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
|
||||
return vk::BlendFactor::eSrcAlphaSaturate;
|
||||
case Maxwell::Blend::Factor::Source1Color:
|
||||
case Maxwell::Blend::Factor::Source1ColorGL:
|
||||
return vk::BlendFactor::eSrc1Color;
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Color:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
|
||||
return vk::BlendFactor::eOneMinusSrc1Color;
|
||||
case Maxwell::Blend::Factor::Source1Alpha:
|
||||
case Maxwell::Blend::Factor::Source1AlphaGL:
|
||||
return vk::BlendFactor::eSrc1Alpha;
|
||||
case Maxwell::Blend::Factor::OneMinusSource1Alpha:
|
||||
case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
|
||||
return vk::BlendFactor::eOneMinusSrc1Alpha;
|
||||
case Maxwell::Blend::Factor::ConstantColor:
|
||||
case Maxwell::Blend::Factor::ConstantColorGL:
|
||||
return vk::BlendFactor::eConstantColor;
|
||||
case Maxwell::Blend::Factor::OneMinusConstantColor:
|
||||
case Maxwell::Blend::Factor::OneMinusConstantColorGL:
|
||||
return vk::BlendFactor::eOneMinusConstantColor;
|
||||
case Maxwell::Blend::Factor::ConstantAlpha:
|
||||
case Maxwell::Blend::Factor::ConstantAlphaGL:
|
||||
return vk::BlendFactor::eConstantAlpha;
|
||||
case Maxwell::Blend::Factor::OneMinusConstantAlpha:
|
||||
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
|
||||
return vk::BlendFactor::eOneMinusConstantAlpha;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
|
||||
switch (front_face) {
|
||||
case Maxwell::Cull::FrontFace::ClockWise:
|
||||
return vk::FrontFace::eClockwise;
|
||||
case Maxwell::Cull::FrontFace::CounterClockWise:
|
||||
return vk::FrontFace::eCounterClockwise;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
|
||||
switch (cull_face) {
|
||||
case Maxwell::Cull::CullFace::Front:
|
||||
return vk::CullModeFlagBits::eFront;
|
||||
case Maxwell::Cull::CullFace::Back:
|
||||
return vk::CullModeFlagBits::eBack;
|
||||
case Maxwell::Cull::CullFace::FrontAndBack:
|
||||
return vk::CullModeFlagBits::eFrontAndBack;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
|
||||
switch (swizzle) {
|
||||
case Tegra::Texture::SwizzleSource::Zero:
|
||||
return vk::ComponentSwizzle::eZero;
|
||||
case Tegra::Texture::SwizzleSource::R:
|
||||
return vk::ComponentSwizzle::eR;
|
||||
case Tegra::Texture::SwizzleSource::G:
|
||||
return vk::ComponentSwizzle::eG;
|
||||
case Tegra::Texture::SwizzleSource::B:
|
||||
return vk::ComponentSwizzle::eB;
|
||||
case Tegra::Texture::SwizzleSource::A:
|
||||
return vk::ComponentSwizzle::eA;
|
||||
case Tegra::Texture::SwizzleSource::OneInt:
|
||||
case Tegra::Texture::SwizzleSource::OneFloat:
|
||||
return vk::ComponentSwizzle::eOne;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace Vulkan::MaxwellToVK
|
||||
58
src/video_core/renderer_vulkan/maxwell_to_vk.h
Normal file
58
src/video_core/renderer_vulkan/maxwell_to_vk.h
Normal file
@@ -0,0 +1,58 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Vulkan::MaxwellToVK {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using PixelFormat = VideoCore::Surface::PixelFormat;
|
||||
using ComponentType = VideoCore::Surface::ComponentType;
|
||||
|
||||
namespace Sampler {
|
||||
|
||||
vk::Filter Filter(Tegra::Texture::TextureFilter filter);
|
||||
|
||||
vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
|
||||
|
||||
vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
|
||||
|
||||
vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
|
||||
|
||||
} // namespace Sampler
|
||||
|
||||
std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
|
||||
PixelFormat pixel_format, ComponentType component_type);
|
||||
|
||||
vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
|
||||
|
||||
vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
|
||||
|
||||
vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
|
||||
|
||||
vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
|
||||
|
||||
vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
|
||||
|
||||
vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);
|
||||
|
||||
vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
|
||||
|
||||
vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
|
||||
|
||||
vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
|
||||
|
||||
vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
|
||||
|
||||
vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
|
||||
|
||||
} // namespace Vulkan::MaxwellToVK
|
||||
@@ -8,7 +8,7 @@
|
||||
#include <tuple>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "core/core.h"
|
||||
#include "common/assert.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
|
||||
@@ -122,8 +122,7 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
|
||||
FormatType format_type) const {
|
||||
const auto it = format_properties.find(wanted_format);
|
||||
if (it == format_properties.end()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}",
|
||||
static_cast<u32>(wanted_format));
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
|
||||
UNREACHABLE();
|
||||
return true;
|
||||
}
|
||||
@@ -219,11 +218,19 @@ std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
|
||||
format_properties.emplace(format, physical.getFormatProperties(format, dldi));
|
||||
};
|
||||
AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
|
||||
AddFormatQuery(vk::Format::eR5G6B5UnormPack16);
|
||||
AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
|
||||
AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
|
||||
AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
|
||||
AddFormatQuery(vk::Format::eR8Unorm);
|
||||
AddFormatQuery(vk::Format::eD32Sfloat);
|
||||
AddFormatQuery(vk::Format::eD16Unorm);
|
||||
AddFormatQuery(vk::Format::eD16UnormS8Uint);
|
||||
AddFormatQuery(vk::Format::eD24UnormS8Uint);
|
||||
AddFormatQuery(vk::Format::eD32SfloatS8Uint);
|
||||
AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
|
||||
AddFormatQuery(vk::Format::eBc2UnormBlock);
|
||||
AddFormatQuery(vk::Format::eBc3UnormBlock);
|
||||
AddFormatQuery(vk::Format::eBc4UnormBlock);
|
||||
|
||||
return format_properties;
|
||||
}
|
||||
|
||||
@@ -165,6 +165,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
|
||||
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
|
||||
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
|
||||
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
|
||||
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
|
||||
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
|
||||
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
|
||||
|
||||
@@ -17,24 +17,6 @@ using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::TextureMiscMode;
|
||||
using Tegra::Shader::TextureProcessMode;
|
||||
using Tegra::Shader::TextureType;
|
||||
|
||||
static std::size_t GetCoordCount(TextureType texture_type) {
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
return 1;
|
||||
case TextureType::Texture2D:
|
||||
return 2;
|
||||
case TextureType::Texture3D:
|
||||
case TextureType::TextureCube:
|
||||
return 3;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
@@ -247,194 +229,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEX: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
|
||||
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const TextureType texture_type{instr.tex.texture_type};
|
||||
const bool is_array = instr.tex.array != 0;
|
||||
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.tex.GetTextureProcessMode();
|
||||
WriteTexInstructionFloat(
|
||||
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEXS: {
|
||||
const TextureType texture_type{instr.texs.GetTextureType()};
|
||||
const bool is_array{instr.texs.IsArrayTexture()};
|
||||
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.texs.GetTextureProcessMode();
|
||||
|
||||
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const Node4 components =
|
||||
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
|
||||
|
||||
if (instr.texs.fp32_flag) {
|
||||
WriteTexsInstructionFloat(bb, instr, components);
|
||||
} else {
|
||||
WriteTexsInstructionHalfFloat(bb, instr, components);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4: {
|
||||
ASSERT(instr.tld4.array == 0);
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
||||
"PTP is not implemented");
|
||||
|
||||
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const auto texture_type = instr.tld4.texture_type.Value();
|
||||
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
||||
const bool is_array = instr.tld4.array != 0;
|
||||
WriteTexInstructionFloat(bb, instr,
|
||||
GetTld4Code(instr, texture_type, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4S: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node op_b = GetRegister(instr.gpr20);
|
||||
|
||||
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
|
||||
std::vector<Node> coords;
|
||||
if (depth_compare) {
|
||||
// Note: TLD4S coordinate encoding works just like TEXS's
|
||||
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_y);
|
||||
coords.push_back(op_b);
|
||||
} else {
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_b);
|
||||
}
|
||||
std::vector<Node> extras;
|
||||
extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
|
||||
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, {}, {}, extras, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, values);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXQ: {
|
||||
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
// TODO: The new commits on the texture refactor, change the way samplers work.
|
||||
// Sadly, not all texture instructions specify the type of texture their sampler
|
||||
// uses. This must be fixed at a later instance.
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
|
||||
|
||||
u32 indexer = 0;
|
||||
switch (instr.txq.query_type) {
|
||||
case Tegra::Shader::TextureQueryType::Dimension: {
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{sampler, {}, {}, {}, element};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
||||
SetTemporal(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
|
||||
static_cast<u32>(instr.txq.query_type.Value()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TMML: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
|
||||
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
auto texture_type = instr.tmml.texture_type.Value();
|
||||
const bool is_array = instr.tmml.array != 0;
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
std::vector<Node> coords;
|
||||
|
||||
// TODO: Add coordinates for different samplers once other texture types are implemented.
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
coords.push_back(GetRegister(instr.gpr8));
|
||||
break;
|
||||
case TextureType::Texture2D:
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
|
||||
|
||||
// Fallback to interpreting as a 2D texture for now
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
texture_type = TextureType::Texture2D;
|
||||
}
|
||||
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporal(bb, element, value);
|
||||
}
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLDS: {
|
||||
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
|
||||
const bool is_array{instr.tlds.IsArrayTexture()};
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
|
||||
|
||||
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
@@ -442,291 +236,4 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
|
||||
bool is_array, bool is_shadow) {
|
||||
const auto offset = static_cast<std::size_t>(sampler.index.Value());
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto itr =
|
||||
std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
[&](const Sampler& entry) { return entry.GetOffset() == offset; });
|
||||
if (itr != used_samplers.end()) {
|
||||
ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
|
||||
itr->IsShadow() == is_shadow);
|
||||
return *itr;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const std::size_t next_index = used_samplers.size();
|
||||
const Sampler entry{offset, next_index, type, is_array, is_shadow};
|
||||
return *used_samplers.emplace(entry).first;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
|
||||
u32 dest_elem = 0;
|
||||
for (u32 elem = 0; elem < 4; ++elem) {
|
||||
if (!instr.tex.IsComponentEnabled(elem)) {
|
||||
// Skip disabled components
|
||||
continue;
|
||||
}
|
||||
SetTemporal(bb, dest_elem++, components[elem]);
|
||||
}
|
||||
// After writing values in temporals, move them to the real registers
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
|
||||
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
|
||||
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
SetTemporal(bb, dest_elem++, components[component]);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
if (i < 2) {
|
||||
// Write the first two swizzle components to gpr0 and gpr0+1
|
||||
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
|
||||
} else {
|
||||
ASSERT(instr.texs.HasTwoDestinations());
|
||||
// Write the rest of the swizzle components to gpr28 and gpr28+1
|
||||
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
|
||||
// float instruction).
|
||||
|
||||
Node4 values;
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
values[dest_elem++] = components[component];
|
||||
}
|
||||
if (dest_elem == 0)
|
||||
return;
|
||||
|
||||
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
|
||||
|
||||
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
|
||||
if (dest_elem <= 2) {
|
||||
SetRegister(bb, instr.gpr0, first_value);
|
||||
return;
|
||||
}
|
||||
|
||||
SetTemporal(bb, 0, first_value);
|
||||
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
|
||||
|
||||
SetRegister(bb, instr.gpr0, GetTemporal(0));
|
||||
SetRegister(bb, instr.gpr28, GetTemporal(1));
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset) {
|
||||
const bool is_array = array;
|
||||
const bool is_shadow = depth_compare;
|
||||
|
||||
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
|
||||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
|
||||
"This method is not supported.");
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
|
||||
|
||||
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
|
||||
process_mode == TextureProcessMode::LL ||
|
||||
process_mode == TextureProcessMode::LLA;
|
||||
|
||||
// LOD selection (either via bias or explicit textureLod) not supported in GL for
|
||||
// sampler2DArrayShadow and samplerCubeArrayShadow.
|
||||
const bool gl_lod_supported =
|
||||
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
|
||||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
|
||||
|
||||
const OperationCode read_method =
|
||||
lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
|
||||
|
||||
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
|
||||
|
||||
std::vector<Node> extras;
|
||||
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
|
||||
if (process_mode == TextureProcessMode::LZ) {
|
||||
extras.push_back(Immediate(0.0f));
|
||||
} else {
|
||||
// If present, lod or bias are always stored in the register indexed by the gpr20
|
||||
// field with an offset depending on the usage of the other registers
|
||||
extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
|
||||
}
|
||||
}
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto copy_coords = coords;
|
||||
MetaTexture meta{sampler, array, depth_compare, extras, element};
|
||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
}
|
||||
// 1D.DC in OpenGL the 2nd component is ignored.
|
||||
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
|
||||
coords.push_back(Immediate(0.0f));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
const u64 last_coord_register =
|
||||
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
const u32 bias_offset = coord_count > 2 ? 1 : 0;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
const bool last = (i == (coord_count - 1)) && (coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||
bool is_array) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (size_t i = 0; i < coord_count; ++i)
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
|
||||
const std::size_t type_coord_count = GetCoordCount(texture_type);
|
||||
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// if is array gpr20 is used
|
||||
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
|
||||
|
||||
const u64 last_coord_register =
|
||||
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < type_coord_count; ++i) {
|
||||
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
// When lod is used always is in gpr20
|
||||
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array, {}, {lod}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
|
||||
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
|
||||
std::size_t max_coords, std::size_t max_inputs) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
|
||||
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
|
||||
if (total_coord_count > max_coords || total_reg_count > max_inputs) {
|
||||
UNIMPLEMENTED_MSG("Unsupported Texture operation");
|
||||
total_coord_count = std::min(total_coord_count, max_coords);
|
||||
}
|
||||
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
|
||||
total_coord_count +=
|
||||
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
|
||||
|
||||
return {coord_count, total_coord_count};
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
534
src/video_core/shader/decode/texture.cpp
Normal file
534
src/video_core/shader/decode/texture.cpp
Normal file
@@ -0,0 +1,534 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::TextureMiscMode;
|
||||
using Tegra::Shader::TextureProcessMode;
|
||||
using Tegra::Shader::TextureType;
|
||||
|
||||
static std::size_t GetCoordCount(TextureType texture_type) {
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
return 1;
|
||||
case TextureType::Texture2D:
|
||||
return 2;
|
||||
case TextureType::Texture3D:
|
||||
case TextureType::TextureCube:
|
||||
return 3;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::TEX: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
|
||||
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const TextureType texture_type{instr.tex.texture_type};
|
||||
const bool is_array = instr.tex.array != 0;
|
||||
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.tex.GetTextureProcessMode();
|
||||
WriteTexInstructionFloat(
|
||||
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEXS: {
|
||||
const TextureType texture_type{instr.texs.GetTextureType()};
|
||||
const bool is_array{instr.texs.IsArrayTexture()};
|
||||
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.texs.GetTextureProcessMode();
|
||||
|
||||
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const Node4 components =
|
||||
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
|
||||
|
||||
if (instr.texs.fp32_flag) {
|
||||
WriteTexsInstructionFloat(bb, instr, components);
|
||||
} else {
|
||||
WriteTexsInstructionHalfFloat(bb, instr, components);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4: {
|
||||
ASSERT(instr.tld4.array == 0);
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
||||
"PTP is not implemented");
|
||||
|
||||
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const auto texture_type = instr.tld4.texture_type.Value();
|
||||
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
||||
const bool is_array = instr.tld4.array != 0;
|
||||
WriteTexInstructionFloat(bb, instr,
|
||||
GetTld4Code(instr, texture_type, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4S: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node op_b = GetRegister(instr.gpr20);
|
||||
|
||||
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
|
||||
std::vector<Node> coords;
|
||||
if (depth_compare) {
|
||||
// Note: TLD4S coordinate encoding works just like TEXS's
|
||||
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_y);
|
||||
coords.push_back(op_b);
|
||||
} else {
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_b);
|
||||
}
|
||||
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
|
||||
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, values);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXQ: {
|
||||
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
// TODO: The new commits on the texture refactor, change the way samplers work.
|
||||
// Sadly, not all texture instructions specify the type of texture their sampler
|
||||
// uses. This must be fixed at a later instance.
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
|
||||
|
||||
u32 indexer = 0;
|
||||
switch (instr.txq.query_type) {
|
||||
case Tegra::Shader::TextureQueryType::Dimension: {
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
||||
SetTemporal(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
|
||||
static_cast<u32>(instr.txq.query_type.Value()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TMML: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
|
||||
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
auto texture_type = instr.tmml.texture_type.Value();
|
||||
const bool is_array = instr.tmml.array != 0;
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
std::vector<Node> coords;
|
||||
|
||||
// TODO: Add coordinates for different samplers once other texture types are implemented.
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
coords.push_back(GetRegister(instr.gpr8));
|
||||
break;
|
||||
case TextureType::Texture2D:
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
|
||||
|
||||
// Fallback to interpreting as a 2D texture for now
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
texture_type = TextureType::Texture2D;
|
||||
}
|
||||
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporal(bb, element, value);
|
||||
}
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLDS: {
|
||||
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
|
||||
const bool is_array{instr.tlds.IsArrayTexture()};
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
|
||||
|
||||
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
|
||||
bool is_array, bool is_shadow) {
|
||||
const auto offset = static_cast<std::size_t>(sampler.index.Value());
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto itr =
|
||||
std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
[&](const Sampler& entry) { return entry.GetOffset() == offset; });
|
||||
if (itr != used_samplers.end()) {
|
||||
ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
|
||||
itr->IsShadow() == is_shadow);
|
||||
return *itr;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const std::size_t next_index = used_samplers.size();
|
||||
const Sampler entry{offset, next_index, type, is_array, is_shadow};
|
||||
return *used_samplers.emplace(entry).first;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
|
||||
u32 dest_elem = 0;
|
||||
for (u32 elem = 0; elem < 4; ++elem) {
|
||||
if (!instr.tex.IsComponentEnabled(elem)) {
|
||||
// Skip disabled components
|
||||
continue;
|
||||
}
|
||||
SetTemporal(bb, dest_elem++, components[elem]);
|
||||
}
|
||||
// After writing values in temporals, move them to the real registers
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
|
||||
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
|
||||
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
SetTemporal(bb, dest_elem++, components[component]);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
if (i < 2) {
|
||||
// Write the first two swizzle components to gpr0 and gpr0+1
|
||||
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
|
||||
} else {
|
||||
ASSERT(instr.texs.HasTwoDestinations());
|
||||
// Write the rest of the swizzle components to gpr28 and gpr28+1
|
||||
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
|
||||
// float instruction).
|
||||
|
||||
Node4 values;
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
values[dest_elem++] = components[component];
|
||||
}
|
||||
if (dest_elem == 0)
|
||||
return;
|
||||
|
||||
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
|
||||
|
||||
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
|
||||
if (dest_elem <= 2) {
|
||||
SetRegister(bb, instr.gpr0, first_value);
|
||||
return;
|
||||
}
|
||||
|
||||
SetTemporal(bb, 0, first_value);
|
||||
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
|
||||
|
||||
SetRegister(bb, instr.gpr0, GetTemporal(0));
|
||||
SetRegister(bb, instr.gpr28, GetTemporal(1));
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset) {
|
||||
const bool is_array = array;
|
||||
const bool is_shadow = depth_compare;
|
||||
|
||||
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
|
||||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
|
||||
"This method is not supported.");
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
|
||||
|
||||
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
|
||||
process_mode == TextureProcessMode::LL ||
|
||||
process_mode == TextureProcessMode::LLA;
|
||||
|
||||
// LOD selection (either via bias or explicit textureLod) not supported in GL for
|
||||
// sampler2DArrayShadow and samplerCubeArrayShadow.
|
||||
const bool gl_lod_supported =
|
||||
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
|
||||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
|
||||
|
||||
const OperationCode read_method =
|
||||
(lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
|
||||
|
||||
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
|
||||
|
||||
Node bias = {};
|
||||
Node lod = {};
|
||||
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
|
||||
switch (process_mode) {
|
||||
case TextureProcessMode::LZ:
|
||||
lod = Immediate(0.0f);
|
||||
break;
|
||||
case TextureProcessMode::LB:
|
||||
// If present, lod or bias are always stored in the register indexed by the gpr20
|
||||
// field with an offset depending on the usage of the other registers
|
||||
bias = GetRegister(instr.gpr20.Value() + bias_offset);
|
||||
break;
|
||||
case TextureProcessMode::LL:
|
||||
lod = GetRegister(instr.gpr20.Value() + bias_offset);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto copy_coords = coords;
|
||||
MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
|
||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
}
|
||||
// 1D.DC in OpenGL the 2nd component is ignored.
|
||||
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
|
||||
coords.push_back(Immediate(0.0f));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
const u64 last_coord_register =
|
||||
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
const u32 bias_offset = coord_count > 2 ? 1 : 0;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
const bool last = (i == (coord_count - 1)) && (coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||
bool is_array) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (size_t i = 0; i < coord_count; ++i)
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
|
||||
const std::size_t type_coord_count = GetCoordCount(texture_type);
|
||||
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// if is array gpr20 is used
|
||||
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
|
||||
|
||||
const u64 last_coord_register =
|
||||
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < type_coord_count; ++i) {
|
||||
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
// When lod is used always is in gpr20
|
||||
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
|
||||
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
|
||||
std::size_t max_coords, std::size_t max_inputs) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
|
||||
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
|
||||
if (total_coord_count > max_coords || total_reg_count > max_inputs) {
|
||||
UNIMPLEMENTED_MSG("Unsupported Texture operation");
|
||||
total_coord_count = std::min(total_coord_count, max_coords);
|
||||
}
|
||||
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
|
||||
total_coord_count +=
|
||||
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
|
||||
|
||||
return {coord_count, total_coord_count};
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -290,7 +290,9 @@ struct MetaTexture {
|
||||
const Sampler& sampler;
|
||||
Node array{};
|
||||
Node depth_compare{};
|
||||
std::vector<Node> extras;
|
||||
Node bias{};
|
||||
Node lod{};
|
||||
Node component{};
|
||||
u32 element{};
|
||||
};
|
||||
|
||||
@@ -614,6 +616,7 @@ private:
|
||||
u32 DecodeHfma2(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeConversion(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeMemory(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeTexture(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
|
||||
|
||||
@@ -89,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
|
||||
|
||||
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
|
||||
switch (format) {
|
||||
// TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
|
||||
// gamma.
|
||||
case Tegra::RenderTargetFormat::RGBA8_SRGB:
|
||||
return PixelFormat::RGBA8_SRGB;
|
||||
case Tegra::RenderTargetFormat::RGBA8_UNORM:
|
||||
|
||||
@@ -23,28 +23,12 @@
|
||||
|
||||
#include "video_core/textures/astc.h"
|
||||
|
||||
class BitStream {
|
||||
class InputBitStream {
|
||||
public:
|
||||
explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
|
||||
explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
|
||||
: m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
|
||||
|
||||
~BitStream() = default;
|
||||
|
||||
int GetBitsWritten() const {
|
||||
return m_BitsWritten;
|
||||
}
|
||||
|
||||
void WriteBitsR(unsigned int val, unsigned int nBits) {
|
||||
for (unsigned int i = 0; i < nBits; i++) {
|
||||
WriteBit((val >> (nBits - i - 1)) & 1);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteBits(unsigned int val, unsigned int nBits) {
|
||||
for (unsigned int i = 0; i < nBits; i++) {
|
||||
WriteBit((val >> i) & 1);
|
||||
}
|
||||
}
|
||||
~InputBitStream() = default;
|
||||
|
||||
int GetBitsRead() const {
|
||||
return m_BitsRead;
|
||||
@@ -70,6 +54,38 @@ public:
|
||||
return ret;
|
||||
}
|
||||
|
||||
private:
|
||||
const int m_NumBits;
|
||||
const unsigned char* m_CurByte;
|
||||
int m_NextBit = 0;
|
||||
int m_BitsRead = 0;
|
||||
|
||||
bool done = false;
|
||||
};
|
||||
|
||||
class OutputBitStream {
|
||||
public:
|
||||
explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
|
||||
: m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
|
||||
|
||||
~OutputBitStream() = default;
|
||||
|
||||
int GetBitsWritten() const {
|
||||
return m_BitsWritten;
|
||||
}
|
||||
|
||||
void WriteBitsR(unsigned int val, unsigned int nBits) {
|
||||
for (unsigned int i = 0; i < nBits; i++) {
|
||||
WriteBit((val >> (nBits - i - 1)) & 1);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteBits(unsigned int val, unsigned int nBits) {
|
||||
for (unsigned int i = 0; i < nBits; i++) {
|
||||
WriteBit((val >> i) & 1);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void WriteBit(int b) {
|
||||
|
||||
@@ -238,8 +254,8 @@ public:
|
||||
// Fills result with the values that are encoded in the given
|
||||
// bitstream. We must know beforehand what the maximum possible
|
||||
// value is, and how many values we're decoding.
|
||||
static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits,
|
||||
uint32_t maxRange, uint32_t nValues) {
|
||||
static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
|
||||
InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
|
||||
// Determine encoding parameters
|
||||
IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
|
||||
|
||||
@@ -267,7 +283,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
|
||||
static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
|
||||
uint32_t nBitsPerValue) {
|
||||
// Implement the algorithm in section C.2.12
|
||||
uint32_t m[5];
|
||||
@@ -327,7 +343,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
|
||||
static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
|
||||
uint32_t nBitsPerValue) {
|
||||
// Implement the algorithm in section C.2.12
|
||||
uint32_t m[3];
|
||||
@@ -406,7 +422,7 @@ struct TexelWeightParams {
|
||||
}
|
||||
};
|
||||
|
||||
static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
|
||||
static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
|
||||
TexelWeightParams params;
|
||||
|
||||
// Read the entire block mode all at once
|
||||
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
|
||||
return params;
|
||||
}
|
||||
|
||||
static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
|
||||
static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
|
||||
uint32_t blockHeight) {
|
||||
// Don't actually care about the void extent, just read the bits...
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
|
||||
|
||||
// We now have enough to decode our integer sequence.
|
||||
std::vector<IntegerEncodedValue> decodedColorValues;
|
||||
BitStream colorStream(data);
|
||||
InputBitStream colorStream(data);
|
||||
IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
|
||||
|
||||
// Once we have the decoded values, we need to dequantize them to the 0-255 range
|
||||
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
|
||||
#undef READ_INT_VALUES
|
||||
}
|
||||
|
||||
static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
|
||||
static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
|
||||
const uint32_t blockHeight, uint32_t* outBuf) {
|
||||
BitStream strm(inBuf);
|
||||
InputBitStream strm(inBuf);
|
||||
TexelWeightParams weightParams = DecodeBlockInfo(strm);
|
||||
|
||||
// Was there an error?
|
||||
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
|
||||
// Define color data.
|
||||
uint8_t colorEndpointData[16];
|
||||
memset(colorEndpointData, 0, sizeof(colorEndpointData));
|
||||
BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
|
||||
OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
|
||||
|
||||
// Read extra config data...
|
||||
uint32_t baseCEM = 0;
|
||||
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
|
||||
memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
|
||||
|
||||
std::vector<IntegerEncodedValue> texelWeightValues;
|
||||
BitStream weightStream(texelWeightData);
|
||||
InputBitStream weightStream(texelWeightData);
|
||||
|
||||
IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
|
||||
weightParams.m_MaxWeight,
|
||||
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
|
||||
|
||||
namespace Tegra::Texture::ASTC {
|
||||
|
||||
std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
|
||||
std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
|
||||
uint32_t depth, uint32_t block_width, uint32_t block_height) {
|
||||
uint32_t blockIdx = 0;
|
||||
std::vector<uint8_t> outData(height * width * depth * 4);
|
||||
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
|
||||
for (uint32_t j = 0; j < height; j += block_height) {
|
||||
for (uint32_t i = 0; i < width; i += block_width) {
|
||||
|
||||
uint8_t* blockPtr = data.data() + blockIdx * 16;
|
||||
const uint8_t* blockPtr = data + blockIdx * 16;
|
||||
|
||||
// Blocks can be at most 12x12
|
||||
uint32_t uncompData[144];
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
namespace Tegra::Texture::ASTC {
|
||||
|
||||
std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
|
||||
std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
|
||||
uint32_t depth, uint32_t block_width, uint32_t block_height);
|
||||
|
||||
} // namespace Tegra::Texture::ASTC
|
||||
|
||||
92
src/video_core/textures/convert.cpp
Normal file
92
src/video_core/textures/convert.cpp
Normal file
@@ -0,0 +1,92 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/textures/astc.h"
|
||||
#include "video_core/textures/convert.h"
|
||||
|
||||
namespace Tegra::Texture {
|
||||
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
|
||||
template <bool reverse>
|
||||
void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
|
||||
union S8Z24 {
|
||||
BitField<0, 24, u32> z24;
|
||||
BitField<24, 8, u32> s8;
|
||||
};
|
||||
static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
|
||||
|
||||
union Z24S8 {
|
||||
BitField<0, 8, u32> s8;
|
||||
BitField<8, 24, u32> z24;
|
||||
};
|
||||
static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
|
||||
|
||||
S8Z24 s8z24_pixel{};
|
||||
Z24S8 z24s8_pixel{};
|
||||
constexpr auto bpp{
|
||||
VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
|
||||
for (std::size_t y = 0; y < height; ++y) {
|
||||
for (std::size_t x = 0; x < width; ++x) {
|
||||
const std::size_t offset{bpp * (y * width + x)};
|
||||
if constexpr (reverse) {
|
||||
std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
|
||||
s8z24_pixel.s8.Assign(z24s8_pixel.s8);
|
||||
s8z24_pixel.z24.Assign(z24s8_pixel.z24);
|
||||
std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
|
||||
} else {
|
||||
std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
|
||||
z24s8_pixel.s8.Assign(s8z24_pixel.s8);
|
||||
z24s8_pixel.z24.Assign(s8z24_pixel.z24);
|
||||
std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
|
||||
SwapS8Z24ToZ24S8<false>(data, width, height);
|
||||
}
|
||||
|
||||
static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
|
||||
SwapS8Z24ToZ24S8<true>(data, width, height);
|
||||
}
|
||||
|
||||
void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
|
||||
bool convert_astc, bool convert_s8z24) {
|
||||
if (convert_astc && IsPixelFormatASTC(pixel_format)) {
|
||||
// Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
|
||||
u32 block_width{};
|
||||
u32 block_height{};
|
||||
std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
|
||||
const std::vector<u8> rgba8_data =
|
||||
Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
|
||||
std::copy(rgba8_data.begin(), rgba8_data.end(), data);
|
||||
|
||||
} else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
|
||||
Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
|
||||
bool convert_astc, bool convert_s8z24) {
|
||||
if (convert_astc && IsPixelFormatASTC(pixel_format)) {
|
||||
LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
|
||||
static_cast<u32>(pixel_format));
|
||||
UNREACHABLE();
|
||||
|
||||
} else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
|
||||
Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Texture
|
||||
18
src/video_core/textures/convert.h
Normal file
18
src/video_core/textures/convert.h
Normal file
@@ -0,0 +1,18 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace Tegra::Texture {
|
||||
|
||||
void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
|
||||
u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
|
||||
|
||||
void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
|
||||
u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
|
||||
|
||||
} // namespace Tegra::Texture
|
||||
@@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
|
||||
const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
|
||||
const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
|
||||
const u32 pixel_index{out_x + pixel_base};
|
||||
data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
|
||||
data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
|
||||
data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
|
||||
data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
|
||||
std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
|
||||
}
|
||||
pixel_base += stride_x;
|
||||
@@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
|
||||
for (u32 xb = 0; xb < blocks_on_x; xb++) {
|
||||
const u32 x_start = xb * block_x_elements;
|
||||
const u32 x_end = std::min(width, x_start + block_x_elements);
|
||||
if (fast) {
|
||||
if constexpr (fast) {
|
||||
FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
|
||||
z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
|
||||
layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
|
||||
|
||||
@@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() {
|
||||
return 512;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unswizzles a swizzled texture without changing its format.
|
||||
*/
|
||||
/// Unswizzles a swizzled texture without changing its format.
|
||||
void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
|
||||
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
|
||||
u32 block_height = TICEntry::DefaultBlockHeight,
|
||||
u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
|
||||
/**
|
||||
* Unswizzles a swizzled texture without changing its format.
|
||||
*/
|
||||
|
||||
/// Unswizzles a swizzled texture without changing its format.
|
||||
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
|
||||
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
|
||||
u32 block_height = TICEntry::DefaultBlockHeight,
|
||||
@@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
|
||||
u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
|
||||
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
|
||||
|
||||
/**
|
||||
* Decodes an unswizzled texture into a A8R8G8B8 texture.
|
||||
*/
|
||||
/// Decodes an unswizzled texture into a A8R8G8B8 texture.
|
||||
std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
|
||||
u32 height);
|
||||
|
||||
/**
|
||||
* This function calculates the correct size of a texture depending if it's tiled or not.
|
||||
*/
|
||||
/// This function calculates the correct size of a texture depending if it's tiled or not.
|
||||
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
|
||||
u32 block_height, u32 block_depth);
|
||||
|
||||
@@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
|
||||
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
|
||||
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
|
||||
u32 block_height);
|
||||
|
||||
/// Copies a tiled subrectangle into a linear surface.
|
||||
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
|
||||
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
|
||||
|
||||
@@ -4,8 +4,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <string>
|
||||
|
||||
namespace WebService {
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/web_result.h"
|
||||
#include "core/settings.h"
|
||||
#include "web_service/web_backend.h"
|
||||
|
||||
namespace WebService {
|
||||
|
||||
@@ -56,6 +56,8 @@ constexpr char NX_SHIM_INJECT_SCRIPT[] = R"(
|
||||
window.nx.endApplet = function() {
|
||||
applet_done = true;
|
||||
};
|
||||
|
||||
window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } };
|
||||
)";
|
||||
|
||||
QString GetNXShimInjectionScript() {
|
||||
|
||||
@@ -20,10 +20,7 @@
|
||||
EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
|
||||
|
||||
void EmuThread::run() {
|
||||
if (!Settings::values.use_multi_core) {
|
||||
// Single core mode must acquire OpenGL context for entire emulation session
|
||||
render_window->MakeCurrent();
|
||||
}
|
||||
render_window->MakeCurrent();
|
||||
|
||||
MicroProfileOnThreadCreate("EmuThread");
|
||||
|
||||
@@ -38,6 +35,11 @@ void EmuThread::run() {
|
||||
|
||||
emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
|
||||
|
||||
if (Settings::values.use_asynchronous_gpu_emulation) {
|
||||
// Release OpenGL context for the GPU thread
|
||||
render_window->DoneCurrent();
|
||||
}
|
||||
|
||||
// holds whether the cpu was running during the last iteration,
|
||||
// so that the DebugModeLeft signal can be emitted before the
|
||||
// next execution step
|
||||
|
||||
@@ -53,8 +53,8 @@ void CompatDB::Submit() {
|
||||
case CompatDBPage::Final:
|
||||
back();
|
||||
LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId());
|
||||
Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility",
|
||||
compatibility->checkedId());
|
||||
Core::System::GetInstance().TelemetrySession().AddField(
|
||||
Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId());
|
||||
|
||||
button(NextButton)->setEnabled(false);
|
||||
button(NextButton)->setText(tr("Submitting"));
|
||||
|
||||
@@ -374,6 +374,8 @@ void Config::ReadValues() {
|
||||
qt_config->value("use_disk_shader_cache", false).toBool();
|
||||
Settings::values.use_accurate_gpu_emulation =
|
||||
qt_config->value("use_accurate_gpu_emulation", false).toBool();
|
||||
Settings::values.use_asynchronous_gpu_emulation =
|
||||
qt_config->value("use_asynchronous_gpu_emulation", false).toBool();
|
||||
|
||||
Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
|
||||
Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
|
||||
@@ -633,6 +635,8 @@ void Config::SaveValues() {
|
||||
qt_config->setValue("frame_limit", Settings::values.frame_limit);
|
||||
qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache);
|
||||
qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);
|
||||
qt_config->setValue("use_asynchronous_gpu_emulation",
|
||||
Settings::values.use_asynchronous_gpu_emulation);
|
||||
|
||||
// Cast to double because Qt's written float values are not human-readable
|
||||
qt_config->setValue("bg_red", (double)Settings::values.bg_red);
|
||||
|
||||
@@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() {
|
||||
ui->frame_limit->setValue(Settings::values.frame_limit);
|
||||
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
|
||||
ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
|
||||
ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
|
||||
ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
|
||||
UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
|
||||
Settings::values.bg_blue));
|
||||
}
|
||||
@@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() {
|
||||
Settings::values.frame_limit = ui->frame_limit->value();
|
||||
Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
|
||||
Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
|
||||
Settings::values.use_asynchronous_gpu_emulation =
|
||||
ui->use_asynchronous_gpu_emulation->isChecked();
|
||||
Settings::values.bg_red = static_cast<float>(bg_color.redF());
|
||||
Settings::values.bg_green = static_cast<float>(bg_color.greenF());
|
||||
Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
|
||||
|
||||
@@ -63,6 +63,13 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="use_asynchronous_gpu_emulation">
|
||||
<property name="text">
|
||||
<string>Use asynchronous GPU emulation</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout">
|
||||
<item>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user