Compare commits
47 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8d4927e29 | ||
|
|
ecccfe0337 | ||
|
|
3ea48e8ebe | ||
|
|
5b7ec71fb7 | ||
|
|
99da6362c4 | ||
|
|
bd983414f6 | ||
|
|
4327f430f1 | ||
|
|
a8fc5d6edd | ||
|
|
fcc3aa0bbf | ||
|
|
8490e7746a | ||
|
|
f0c4ac9abd | ||
|
|
0829ef97ca | ||
|
|
f0bfb24c61 | ||
|
|
83ba3515ec | ||
|
|
cd542d5aac | ||
|
|
c425a1a857 | ||
|
|
8beca060d1 | ||
|
|
86b55cb6df | ||
|
|
8135f4bfce | ||
|
|
c440ecfafe | ||
|
|
054e39647c | ||
|
|
e25c464c02 | ||
|
|
18fe910957 | ||
|
|
b12ab4d805 | ||
|
|
cc94a6d101 | ||
|
|
afb8af9853 | ||
|
|
e60d4d70bc | ||
|
|
48d9d66dc5 | ||
|
|
444231a83d | ||
|
|
c1accfefde | ||
|
|
27e5efd265 | ||
|
|
f5ec165e8c | ||
|
|
edd668047c | ||
|
|
1ddcd0e6f0 | ||
|
|
a6a73d8892 | ||
|
|
1d98027a0e | ||
|
|
2374471a1e | ||
|
|
e543320129 | ||
|
|
504aafedd2 | ||
|
|
e36e7ae74e | ||
|
|
259e52ccb2 | ||
|
|
889c646ac0 | ||
|
|
d62b0a9e29 | ||
|
|
f09d1dffd1 | ||
|
|
0d1d755086 | ||
|
|
42b75e8be8 | ||
|
|
6a6fabea58 |
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -37,3 +37,6 @@
|
||||
[submodule "discord-rpc"]
|
||||
path = externals/discord-rpc
|
||||
url = https://github.com/discordapp/discord-rpc.git
|
||||
[submodule "Vulkan-Headers"]
|
||||
path = externals/Vulkan-Headers
|
||||
url = https://github.com/KhronosGroup/Vulkan-Headers.git
|
||||
|
||||
@@ -23,6 +23,8 @@ option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OF
|
||||
|
||||
option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)
|
||||
|
||||
option(ENABLE_VULKAN "Enables Vulkan backend" ON)
|
||||
|
||||
option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)
|
||||
|
||||
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
|
||||
|
||||
1
externals/Vulkan-Headers
vendored
Submodule
1
externals/Vulkan-Headers
vendored
Submodule
Submodule externals/Vulkan-Headers added at 7f02d9bb81
@@ -26,14 +26,15 @@ static Stream::Format ChannelsToStreamFormat(u32 num_channels) {
|
||||
return {};
|
||||
}
|
||||
|
||||
StreamPtr AudioOut::OpenStream(u32 sample_rate, u32 num_channels, std::string&& name,
|
||||
StreamPtr AudioOut::OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate,
|
||||
u32 num_channels, std::string&& name,
|
||||
Stream::ReleaseCallback&& release_callback) {
|
||||
if (!sink) {
|
||||
sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id);
|
||||
}
|
||||
|
||||
return std::make_shared<Stream>(
|
||||
sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
|
||||
core_timing, sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
|
||||
sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name));
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,10 @@
|
||||
#include "audio_core/stream.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
}
|
||||
|
||||
namespace AudioCore {
|
||||
|
||||
/**
|
||||
@@ -21,8 +25,8 @@ namespace AudioCore {
|
||||
class AudioOut {
|
||||
public:
|
||||
/// Opens a new audio stream
|
||||
StreamPtr OpenStream(u32 sample_rate, u32 num_channels, std::string&& name,
|
||||
Stream::ReleaseCallback&& release_callback);
|
||||
StreamPtr OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, u32 num_channels,
|
||||
std::string&& name, Stream::ReleaseCallback&& release_callback);
|
||||
|
||||
/// Returns a vector of recently released buffers specified by tag for the specified stream
|
||||
std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count);
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "audio_core/codec.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/hle/kernel/writable_event.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
@@ -71,14 +72,14 @@ private:
|
||||
EffectOutStatus out_status{};
|
||||
EffectInStatus info{};
|
||||
};
|
||||
AudioRenderer::AudioRenderer(AudioRendererParameter params,
|
||||
AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
|
||||
Kernel::SharedPtr<Kernel::WritableEvent> buffer_event)
|
||||
: worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
|
||||
effects(params.effect_count) {
|
||||
|
||||
audio_out = std::make_unique<AudioCore::AudioOut>();
|
||||
stream = audio_out->OpenStream(STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, "AudioRenderer",
|
||||
[=]() { buffer_event->Signal(); });
|
||||
stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
|
||||
"AudioRenderer", [=]() { buffer_event->Signal(); });
|
||||
audio_out->StartStream(stream);
|
||||
|
||||
QueueMixedBuffer(0);
|
||||
|
||||
@@ -14,6 +14,10 @@
|
||||
#include "common/swap.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
}
|
||||
|
||||
namespace Kernel {
|
||||
class WritableEvent;
|
||||
}
|
||||
@@ -208,7 +212,7 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size
|
||||
|
||||
class AudioRenderer {
|
||||
public:
|
||||
AudioRenderer(AudioRendererParameter params,
|
||||
AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
|
||||
Kernel::SharedPtr<Kernel::WritableEvent> buffer_event);
|
||||
~AudioRenderer();
|
||||
|
||||
|
||||
@@ -32,12 +32,12 @@ u32 Stream::GetNumChannels() const {
|
||||
return {};
|
||||
}
|
||||
|
||||
Stream::Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback,
|
||||
SinkStream& sink_stream, std::string&& name_)
|
||||
Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
|
||||
ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_)
|
||||
: sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)},
|
||||
sink_stream{sink_stream}, name{std::move(name_)} {
|
||||
sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {
|
||||
|
||||
release_event = CoreTiming::RegisterEvent(
|
||||
release_event = core_timing.RegisterEvent(
|
||||
name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); });
|
||||
}
|
||||
|
||||
@@ -57,7 +57,7 @@ Stream::State Stream::GetState() const {
|
||||
|
||||
s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
|
||||
const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
|
||||
return CoreTiming::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
|
||||
return Core::Timing::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
|
||||
}
|
||||
|
||||
static void VolumeAdjustSamples(std::vector<s16>& samples) {
|
||||
@@ -99,7 +99,7 @@ void Stream::PlayNextBuffer() {
|
||||
|
||||
sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
|
||||
|
||||
CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
|
||||
core_timing.ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
|
||||
}
|
||||
|
||||
void Stream::ReleaseActiveBuffer() {
|
||||
|
||||
@@ -13,9 +13,10 @@
|
||||
#include "audio_core/buffer.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace CoreTiming {
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
struct EventType;
|
||||
}
|
||||
} // namespace Core::Timing
|
||||
|
||||
namespace AudioCore {
|
||||
|
||||
@@ -42,8 +43,8 @@ public:
|
||||
/// Callback function type, used to change guest state on a buffer being released
|
||||
using ReleaseCallback = std::function<void()>;
|
||||
|
||||
Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback,
|
||||
SinkStream& sink_stream, std::string&& name_);
|
||||
Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
|
||||
ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_);
|
||||
|
||||
/// Plays the audio stream
|
||||
void Play();
|
||||
@@ -91,16 +92,17 @@ private:
|
||||
/// Gets the number of core cycles when the specified buffer will be released
|
||||
s64 GetBufferReleaseCycles(const Buffer& buffer) const;
|
||||
|
||||
u32 sample_rate; ///< Sample rate of the stream
|
||||
Format format; ///< Format of the stream
|
||||
ReleaseCallback release_callback; ///< Buffer release callback for the stream
|
||||
State state{State::Stopped}; ///< Playback state of the stream
|
||||
CoreTiming::EventType* release_event{}; ///< Core timing release event for the stream
|
||||
BufferPtr active_buffer; ///< Actively playing buffer in the stream
|
||||
std::queue<BufferPtr> queued_buffers; ///< Buffers queued to be played in the stream
|
||||
std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream
|
||||
SinkStream& sink_stream; ///< Output sink for the stream
|
||||
std::string name; ///< Name of the stream, must be unique
|
||||
u32 sample_rate; ///< Sample rate of the stream
|
||||
Format format; ///< Format of the stream
|
||||
ReleaseCallback release_callback; ///< Buffer release callback for the stream
|
||||
State state{State::Stopped}; ///< Playback state of the stream
|
||||
Core::Timing::EventType* release_event{}; ///< Core timing release event for the stream
|
||||
BufferPtr active_buffer; ///< Actively playing buffer in the stream
|
||||
std::queue<BufferPtr> queued_buffers; ///< Buffers queued to be played in the stream
|
||||
std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream
|
||||
SinkStream& sink_stream; ///< Output sink for the stream
|
||||
Core::Timing::CoreTiming& core_timing; ///< Core timing instance.
|
||||
std::string name; ///< Name of the stream, must be unique
|
||||
};
|
||||
|
||||
using StreamPtr = std::shared_ptr<Stream>;
|
||||
|
||||
@@ -113,6 +113,8 @@ add_library(common STATIC
|
||||
threadsafe_queue.h
|
||||
timer.cpp
|
||||
timer.h
|
||||
uint128.cpp
|
||||
uint128.h
|
||||
vector_math.h
|
||||
web_result.h
|
||||
)
|
||||
|
||||
@@ -232,6 +232,7 @@ void DebuggerBackend::Write(const Entry& entry) {
|
||||
CLS(Render) \
|
||||
SUB(Render, Software) \
|
||||
SUB(Render, OpenGL) \
|
||||
SUB(Render, Vulkan) \
|
||||
CLS(Audio) \
|
||||
SUB(Audio, DSP) \
|
||||
SUB(Audio, Sink) \
|
||||
|
||||
@@ -112,6 +112,7 @@ enum class Class : ClassType {
|
||||
Render, ///< Emulator video output and hardware acceleration
|
||||
Render_Software, ///< Software renderer backend
|
||||
Render_OpenGL, ///< OpenGL backend
|
||||
Render_Vulkan, ///< Vulkan backend
|
||||
Audio, ///< Audio emulation
|
||||
Audio_DSP, ///< The HLE implementation of the DSP
|
||||
Audio_Sink, ///< Emulator audio output backend
|
||||
|
||||
@@ -7,17 +7,16 @@
|
||||
// a simple lockless thread-safe,
|
||||
// single reader, single writer queue
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <mutex>
|
||||
#include "common/common_types.h"
|
||||
#include <utility>
|
||||
|
||||
namespace Common {
|
||||
template <typename T, bool NeedSize = true>
|
||||
template <typename T>
|
||||
class SPSCQueue {
|
||||
public:
|
||||
SPSCQueue() : size(0) {
|
||||
SPSCQueue() {
|
||||
write_ptr = read_ptr = new ElementPtr();
|
||||
}
|
||||
~SPSCQueue() {
|
||||
@@ -25,13 +24,12 @@ public:
|
||||
delete read_ptr;
|
||||
}
|
||||
|
||||
u32 Size() const {
|
||||
static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
|
||||
std::size_t Size() const {
|
||||
return size.load();
|
||||
}
|
||||
|
||||
bool Empty() const {
|
||||
return !read_ptr->next.load();
|
||||
return Size() == 0;
|
||||
}
|
||||
|
||||
T& Front() const {
|
||||
@@ -47,13 +45,13 @@ public:
|
||||
ElementPtr* new_ptr = new ElementPtr();
|
||||
write_ptr->next.store(new_ptr, std::memory_order_release);
|
||||
write_ptr = new_ptr;
|
||||
if (NeedSize)
|
||||
size++;
|
||||
|
||||
++size;
|
||||
}
|
||||
|
||||
void Pop() {
|
||||
if (NeedSize)
|
||||
size--;
|
||||
--size;
|
||||
|
||||
ElementPtr* tmpptr = read_ptr;
|
||||
// advance the read pointer
|
||||
read_ptr = tmpptr->next.load();
|
||||
@@ -66,8 +64,7 @@ public:
|
||||
if (Empty())
|
||||
return false;
|
||||
|
||||
if (NeedSize)
|
||||
size--;
|
||||
--size;
|
||||
|
||||
ElementPtr* tmpptr = read_ptr;
|
||||
read_ptr = tmpptr->next.load(std::memory_order_acquire);
|
||||
@@ -89,7 +86,7 @@ private:
|
||||
// and a pointer to the next ElementPtr
|
||||
class ElementPtr {
|
||||
public:
|
||||
ElementPtr() : next(nullptr) {}
|
||||
ElementPtr() {}
|
||||
~ElementPtr() {
|
||||
ElementPtr* next_ptr = next.load();
|
||||
|
||||
@@ -98,21 +95,21 @@ private:
|
||||
}
|
||||
|
||||
T current;
|
||||
std::atomic<ElementPtr*> next;
|
||||
std::atomic<ElementPtr*> next{nullptr};
|
||||
};
|
||||
|
||||
ElementPtr* write_ptr;
|
||||
ElementPtr* read_ptr;
|
||||
std::atomic<u32> size;
|
||||
std::atomic_size_t size{0};
|
||||
};
|
||||
|
||||
// a simple thread-safe,
|
||||
// single reader, multiple writer queue
|
||||
|
||||
template <typename T, bool NeedSize = true>
|
||||
template <typename T>
|
||||
class MPSCQueue {
|
||||
public:
|
||||
u32 Size() const {
|
||||
std::size_t Size() const {
|
||||
return spsc_queue.Size();
|
||||
}
|
||||
|
||||
@@ -144,7 +141,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
SPSCQueue<T, NeedSize> spsc_queue;
|
||||
SPSCQueue<T> spsc_queue;
|
||||
std::mutex write_lock;
|
||||
};
|
||||
} // namespace Common
|
||||
|
||||
41
src/common/uint128.cpp
Normal file
41
src/common/uint128.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
|
||||
#pragma intrinsic(_umul128)
|
||||
#endif
|
||||
#include <cstring>
|
||||
#include "common/uint128.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
u128 Multiply64Into128(u64 a, u64 b) {
|
||||
u128 result;
|
||||
#ifdef _MSC_VER
|
||||
result[0] = _umul128(a, b, &result[1]);
|
||||
#else
|
||||
unsigned __int128 tmp = a;
|
||||
tmp *= b;
|
||||
std::memcpy(&result, &tmp, sizeof(u128));
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
|
||||
u64 remainder = dividend[0] % divisor;
|
||||
u64 accum = dividend[0] / divisor;
|
||||
if (dividend[1] == 0)
|
||||
return {accum, remainder};
|
||||
// We ignore dividend[1] / divisor as that overflows
|
||||
const u64 first_segment = (dividend[1] % divisor) << 32;
|
||||
accum += (first_segment / divisor) << 32;
|
||||
const u64 second_segment = (first_segment % divisor) << 32;
|
||||
accum += (second_segment / divisor);
|
||||
remainder += second_segment % divisor;
|
||||
if (remainder >= divisor) {
|
||||
accum++;
|
||||
remainder -= divisor;
|
||||
}
|
||||
return {accum, remainder};
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
14
src/common/uint128.h
Normal file
14
src/common/uint128.h
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
#include <utility>
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
// This function multiplies 2 u64 values and produces a u128 value;
|
||||
u128 Multiply64Into128(u64 a, u64 b);
|
||||
|
||||
// This function divides a u128 by a u32 value and produces two u64 values:
|
||||
// the result of division and the remainder
|
||||
std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
|
||||
|
||||
} // namespace Common
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "core/core.h"
|
||||
#include "core/core_cpu.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
#include "core/gdbstub/gdbstub.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/hle/kernel/svc.h"
|
||||
@@ -112,14 +113,14 @@ public:
|
||||
// Always execute at least one tick.
|
||||
amortized_ticks = std::max<u64>(amortized_ticks, 1);
|
||||
|
||||
CoreTiming::AddTicks(amortized_ticks);
|
||||
parent.core_timing.AddTicks(amortized_ticks);
|
||||
num_interpreted_instructions = 0;
|
||||
}
|
||||
u64 GetTicksRemaining() override {
|
||||
return std::max(CoreTiming::GetDowncount(), 0);
|
||||
return std::max(parent.core_timing.GetDowncount(), 0);
|
||||
}
|
||||
u64 GetCNTPCT() override {
|
||||
return CoreTiming::GetTicks();
|
||||
return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
|
||||
}
|
||||
|
||||
ARM_Dynarmic& parent;
|
||||
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
|
||||
config.tpidr_el0 = &cb->tpidr_el0;
|
||||
config.dczid_el0 = 4;
|
||||
config.ctr_el0 = 0x8444c004;
|
||||
config.cntfrq_el0 = 19200000; // Value from fusee.
|
||||
config.cntfrq_el0 = Timing::CNTFREQ;
|
||||
|
||||
// Unpredictable instructions
|
||||
config.define_unpredictable_behaviour = true;
|
||||
@@ -172,8 +173,10 @@ void ARM_Dynarmic::Step() {
|
||||
cb->InterpreterFallback(jit->GetPC(), 1);
|
||||
}
|
||||
|
||||
ARM_Dynarmic::ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
|
||||
: cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), core_index{core_index},
|
||||
ARM_Dynarmic::ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
|
||||
std::size_t core_index)
|
||||
: cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{core_timing},
|
||||
core_index{core_index}, core_timing{core_timing},
|
||||
exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {
|
||||
ThreadContext ctx{};
|
||||
inner_unicorn.SaveContext(ctx);
|
||||
|
||||
@@ -16,6 +16,10 @@ namespace Memory {
|
||||
struct PageTable;
|
||||
}
|
||||
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
|
||||
class ARM_Dynarmic_Callbacks;
|
||||
@@ -23,7 +27,8 @@ class DynarmicExclusiveMonitor;
|
||||
|
||||
class ARM_Dynarmic final : public ARM_Interface {
|
||||
public:
|
||||
ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
|
||||
ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
|
||||
std::size_t core_index);
|
||||
~ARM_Dynarmic();
|
||||
|
||||
void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
|
||||
@@ -62,6 +67,7 @@ private:
|
||||
ARM_Unicorn inner_unicorn;
|
||||
|
||||
std::size_t core_index;
|
||||
Timing::CoreTiming& core_timing;
|
||||
DynarmicExclusiveMonitor& exclusive_monitor;
|
||||
|
||||
Memory::PageTable* current_page_table = nullptr;
|
||||
|
||||
@@ -72,7 +72,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
|
||||
return {};
|
||||
}
|
||||
|
||||
ARM_Unicorn::ARM_Unicorn() {
|
||||
ARM_Unicorn::ARM_Unicorn(Timing::CoreTiming& core_timing) : core_timing{core_timing} {
|
||||
CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));
|
||||
|
||||
auto fpv = 3 << 20;
|
||||
@@ -177,7 +177,7 @@ void ARM_Unicorn::Run() {
|
||||
if (GDBStub::IsServerEnabled()) {
|
||||
ExecuteInstructions(std::max(4000000, 0));
|
||||
} else {
|
||||
ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0));
|
||||
ExecuteInstructions(std::max(core_timing.GetDowncount(), 0));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,7 +190,7 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
|
||||
void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
|
||||
MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
|
||||
CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
|
||||
CoreTiming::AddTicks(num_instructions);
|
||||
core_timing.AddTicks(num_instructions);
|
||||
if (GDBStub::IsServerEnabled()) {
|
||||
if (last_bkpt_hit) {
|
||||
uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
|
||||
|
||||
@@ -9,12 +9,17 @@
|
||||
#include "core/arm/arm_interface.h"
|
||||
#include "core/gdbstub/gdbstub.h"
|
||||
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
|
||||
class ARM_Unicorn final : public ARM_Interface {
|
||||
public:
|
||||
ARM_Unicorn();
|
||||
explicit ARM_Unicorn(Timing::CoreTiming& core_timing);
|
||||
~ARM_Unicorn();
|
||||
|
||||
void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
|
||||
Kernel::VMAPermission perms) override;
|
||||
void UnmapMemory(VAddr address, std::size_t size) override;
|
||||
@@ -43,6 +48,7 @@ public:
|
||||
|
||||
private:
|
||||
uc_engine* uc{};
|
||||
Timing::CoreTiming& core_timing;
|
||||
GDBStub::BreakpointAddress last_bkpt{};
|
||||
bool last_bkpt_hit;
|
||||
};
|
||||
|
||||
@@ -94,8 +94,8 @@ struct System::Impl {
|
||||
ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) {
|
||||
LOG_DEBUG(HW_Memory, "initialized OK");
|
||||
|
||||
CoreTiming::Init();
|
||||
kernel.Initialize();
|
||||
core_timing.Initialize();
|
||||
kernel.Initialize(core_timing);
|
||||
|
||||
const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch());
|
||||
@@ -120,7 +120,7 @@ struct System::Impl {
|
||||
telemetry_session = std::make_unique<Core::TelemetrySession>();
|
||||
service_manager = std::make_shared<Service::SM::ServiceManager>();
|
||||
|
||||
Service::Init(service_manager, *virtual_filesystem);
|
||||
Service::Init(service_manager, system, *virtual_filesystem);
|
||||
GDBStub::Init();
|
||||
|
||||
renderer = VideoCore::CreateRenderer(emu_window, system);
|
||||
@@ -205,7 +205,7 @@ struct System::Impl {
|
||||
|
||||
// Shutdown kernel and core timing
|
||||
kernel.Shutdown();
|
||||
CoreTiming::Shutdown();
|
||||
core_timing.Shutdown();
|
||||
|
||||
// Close app loader
|
||||
app_loader.reset();
|
||||
@@ -232,9 +232,10 @@ struct System::Impl {
|
||||
}
|
||||
|
||||
PerfStatsResults GetAndResetPerfStats() {
|
||||
return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
|
||||
return perf_stats.GetAndResetStats(core_timing.GetGlobalTimeUs());
|
||||
}
|
||||
|
||||
Timing::CoreTiming core_timing;
|
||||
Kernel::KernelCore kernel;
|
||||
/// RealVfsFilesystem instance
|
||||
FileSys::VirtualFilesystem virtual_filesystem;
|
||||
@@ -396,6 +397,14 @@ const Kernel::KernelCore& System::Kernel() const {
|
||||
return impl->kernel;
|
||||
}
|
||||
|
||||
Timing::CoreTiming& System::CoreTiming() {
|
||||
return impl->core_timing;
|
||||
}
|
||||
|
||||
const Timing::CoreTiming& System::CoreTiming() const {
|
||||
return impl->core_timing;
|
||||
}
|
||||
|
||||
Core::PerfStats& System::GetPerfStats() {
|
||||
return impl->perf_stats;
|
||||
}
|
||||
|
||||
@@ -47,6 +47,10 @@ namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
|
||||
class ARM_Interface;
|
||||
@@ -205,6 +209,12 @@ public:
|
||||
/// Provides a constant pointer to the current process.
|
||||
const Kernel::Process* CurrentProcess() const;
|
||||
|
||||
/// Provides a reference to the core timing instance.
|
||||
Timing::CoreTiming& CoreTiming();
|
||||
|
||||
/// Provides a constant reference to the core timing instance.
|
||||
const Timing::CoreTiming& CoreTiming() const;
|
||||
|
||||
/// Provides a reference to the kernel instance.
|
||||
Kernel::KernelCore& Kernel();
|
||||
|
||||
|
||||
@@ -49,17 +49,18 @@ bool CpuBarrier::Rendezvous() {
|
||||
return false;
|
||||
}
|
||||
|
||||
Cpu::Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index)
|
||||
: cpu_barrier{cpu_barrier}, core_index{core_index} {
|
||||
Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
|
||||
CpuBarrier& cpu_barrier, std::size_t core_index)
|
||||
: cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} {
|
||||
if (Settings::values.use_cpu_jit) {
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
arm_interface = std::make_unique<ARM_Dynarmic>(exclusive_monitor, core_index);
|
||||
arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
|
||||
#else
|
||||
arm_interface = std::make_unique<ARM_Unicorn>();
|
||||
LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
|
||||
#endif
|
||||
} else {
|
||||
arm_interface = std::make_unique<ARM_Unicorn>();
|
||||
arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
|
||||
}
|
||||
|
||||
scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface);
|
||||
@@ -93,14 +94,14 @@ void Cpu::RunLoop(bool tight_loop) {
|
||||
|
||||
if (IsMainCore()) {
|
||||
// TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
|
||||
CoreTiming::Idle();
|
||||
CoreTiming::Advance();
|
||||
core_timing.Idle();
|
||||
core_timing.Advance();
|
||||
}
|
||||
|
||||
PrepareReschedule();
|
||||
} else {
|
||||
if (IsMainCore()) {
|
||||
CoreTiming::Advance();
|
||||
core_timing.Advance();
|
||||
}
|
||||
|
||||
if (tight_loop) {
|
||||
|
||||
@@ -15,6 +15,10 @@ namespace Kernel {
|
||||
class Scheduler;
|
||||
}
|
||||
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
|
||||
class ARM_Interface;
|
||||
@@ -41,7 +45,8 @@ private:
|
||||
|
||||
class Cpu {
|
||||
public:
|
||||
Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index);
|
||||
Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
|
||||
CpuBarrier& cpu_barrier, std::size_t core_index);
|
||||
~Cpu();
|
||||
|
||||
void RunLoop(bool tight_loop = true);
|
||||
@@ -82,6 +87,7 @@ private:
|
||||
std::unique_ptr<ARM_Interface> arm_interface;
|
||||
CpuBarrier& cpu_barrier;
|
||||
std::unique_ptr<Kernel::Scheduler> scheduler;
|
||||
Timing::CoreTiming& core_timing;
|
||||
|
||||
std::atomic<bool> reschedule_pending = false;
|
||||
std::size_t core_index;
|
||||
|
||||
@@ -8,71 +8,60 @@
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/thread.h"
|
||||
#include "common/threadsafe_queue.h"
|
||||
#include "core/core_timing_util.h"
|
||||
|
||||
namespace CoreTiming {
|
||||
namespace Core::Timing {
|
||||
|
||||
static s64 global_timer;
|
||||
static int slice_length;
|
||||
static int downcount;
|
||||
constexpr int MAX_SLICE_LENGTH = 20000;
|
||||
|
||||
struct EventType {
|
||||
TimedCallback callback;
|
||||
const std::string* name;
|
||||
};
|
||||
|
||||
struct Event {
|
||||
struct CoreTiming::Event {
|
||||
s64 time;
|
||||
u64 fifo_order;
|
||||
u64 userdata;
|
||||
const EventType* type;
|
||||
|
||||
// Sort by time, unless the times are the same, in which case sort by
|
||||
// the order added to the queue
|
||||
friend bool operator>(const Event& left, const Event& right) {
|
||||
return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
|
||||
}
|
||||
|
||||
friend bool operator<(const Event& left, const Event& right) {
|
||||
return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
|
||||
}
|
||||
};
|
||||
|
||||
// Sort by time, unless the times are the same, in which case sort by the order added to the queue
|
||||
static bool operator>(const Event& left, const Event& right) {
|
||||
return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
|
||||
CoreTiming::CoreTiming() = default;
|
||||
CoreTiming::~CoreTiming() = default;
|
||||
|
||||
void CoreTiming::Initialize() {
|
||||
downcount = MAX_SLICE_LENGTH;
|
||||
slice_length = MAX_SLICE_LENGTH;
|
||||
global_timer = 0;
|
||||
idled_cycles = 0;
|
||||
|
||||
// The time between CoreTiming being initialized and the first call to Advance() is considered
|
||||
// the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
|
||||
// executing the first cycle of each slice to prepare the slice length and downcount for
|
||||
// that slice.
|
||||
is_global_timer_sane = true;
|
||||
|
||||
event_fifo_id = 0;
|
||||
|
||||
const auto empty_timed_callback = [](u64, s64) {};
|
||||
ev_lost = RegisterEvent("_lost_event", empty_timed_callback);
|
||||
}
|
||||
|
||||
static bool operator<(const Event& left, const Event& right) {
|
||||
return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
|
||||
void CoreTiming::Shutdown() {
|
||||
MoveEvents();
|
||||
ClearPendingEvents();
|
||||
UnregisterAllEvents();
|
||||
}
|
||||
|
||||
// unordered_map stores each element separately as a linked list node so pointers to elements
|
||||
// remain stable regardless of rehashes/resizing.
|
||||
static std::unordered_map<std::string, EventType> event_types;
|
||||
|
||||
// The queue is a min-heap using std::make_heap/push_heap/pop_heap.
|
||||
// We don't use std::priority_queue because we need to be able to serialize, unserialize and
|
||||
// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't accomodated
|
||||
// by the standard adaptor class.
|
||||
static std::vector<Event> event_queue;
|
||||
static u64 event_fifo_id;
|
||||
// the queue for storing the events from other threads threadsafe until they will be added
|
||||
// to the event_queue by the emu thread
|
||||
static Common::MPSCQueue<Event, false> ts_queue;
|
||||
|
||||
// the queue for unscheduling the events from other threads threadsafe
|
||||
static Common::MPSCQueue<std::pair<const EventType*, u64>, false> unschedule_queue;
|
||||
|
||||
constexpr int MAX_SLICE_LENGTH = 20000;
|
||||
|
||||
static s64 idled_cycles;
|
||||
|
||||
// Are we in a function that has been called from Advance()
|
||||
// If events are sheduled from a function that gets called from Advance(),
|
||||
// don't change slice_length and downcount.
|
||||
static bool is_global_timer_sane;
|
||||
|
||||
static EventType* ev_lost = nullptr;
|
||||
|
||||
static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}
|
||||
|
||||
EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
|
||||
EventType* CoreTiming::RegisterEvent(const std::string& name, TimedCallback callback) {
|
||||
// check for existing type with same name.
|
||||
// we want event type names to remain unique so that we can use them for serialization.
|
||||
ASSERT_MSG(event_types.find(name) == event_types.end(),
|
||||
@@ -86,71 +75,31 @@ EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
|
||||
return event_type;
|
||||
}
|
||||
|
||||
void UnregisterAllEvents() {
|
||||
void CoreTiming::UnregisterAllEvents() {
|
||||
ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
|
||||
event_types.clear();
|
||||
}
|
||||
|
||||
void Init() {
|
||||
downcount = MAX_SLICE_LENGTH;
|
||||
slice_length = MAX_SLICE_LENGTH;
|
||||
global_timer = 0;
|
||||
idled_cycles = 0;
|
||||
|
||||
// The time between CoreTiming being intialized and the first call to Advance() is considered
|
||||
// the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
|
||||
// executing the first cycle of each slice to prepare the slice length and downcount for
|
||||
// that slice.
|
||||
is_global_timer_sane = true;
|
||||
|
||||
event_fifo_id = 0;
|
||||
ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback);
|
||||
}
|
||||
|
||||
void Shutdown() {
|
||||
MoveEvents();
|
||||
ClearPendingEvents();
|
||||
UnregisterAllEvents();
|
||||
}
|
||||
|
||||
// This should only be called from the CPU thread. If you are calling
|
||||
// it from any other thread, you are doing something evil
|
||||
u64 GetTicks() {
|
||||
u64 ticks = static_cast<u64>(global_timer);
|
||||
if (!is_global_timer_sane) {
|
||||
ticks += slice_length - downcount;
|
||||
}
|
||||
return ticks;
|
||||
}
|
||||
|
||||
void AddTicks(u64 ticks) {
|
||||
downcount -= static_cast<int>(ticks);
|
||||
}
|
||||
|
||||
u64 GetIdleTicks() {
|
||||
return static_cast<u64>(idled_cycles);
|
||||
}
|
||||
|
||||
void ClearPendingEvents() {
|
||||
event_queue.clear();
|
||||
}
|
||||
|
||||
void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
|
||||
void CoreTiming::ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
|
||||
ASSERT(event_type != nullptr);
|
||||
s64 timeout = GetTicks() + cycles_into_future;
|
||||
const s64 timeout = GetTicks() + cycles_into_future;
|
||||
|
||||
// If this event needs to be scheduled before the next advance(), force one early
|
||||
if (!is_global_timer_sane)
|
||||
if (!is_global_timer_sane) {
|
||||
ForceExceptionCheck(cycles_into_future);
|
||||
}
|
||||
|
||||
event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
|
||||
std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
|
||||
}
|
||||
|
||||
void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
|
||||
void CoreTiming::ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
|
||||
u64 userdata) {
|
||||
ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type});
|
||||
}
|
||||
|
||||
void UnscheduleEvent(const EventType* event_type, u64 userdata) {
|
||||
auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
|
||||
void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) {
|
||||
const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
|
||||
return e.type == event_type && e.userdata == userdata;
|
||||
});
|
||||
|
||||
@@ -161,13 +110,33 @@ void UnscheduleEvent(const EventType* event_type, u64 userdata) {
|
||||
}
|
||||
}
|
||||
|
||||
void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
|
||||
void CoreTiming::UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
|
||||
unschedule_queue.Push(std::make_pair(event_type, userdata));
|
||||
}
|
||||
|
||||
void RemoveEvent(const EventType* event_type) {
|
||||
auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
|
||||
[&](const Event& e) { return e.type == event_type; });
|
||||
u64 CoreTiming::GetTicks() const {
|
||||
u64 ticks = static_cast<u64>(global_timer);
|
||||
if (!is_global_timer_sane) {
|
||||
ticks += slice_length - downcount;
|
||||
}
|
||||
return ticks;
|
||||
}
|
||||
|
||||
u64 CoreTiming::GetIdleTicks() const {
|
||||
return static_cast<u64>(idled_cycles);
|
||||
}
|
||||
|
||||
void CoreTiming::AddTicks(u64 ticks) {
|
||||
downcount -= static_cast<int>(ticks);
|
||||
}
|
||||
|
||||
void CoreTiming::ClearPendingEvents() {
|
||||
event_queue.clear();
|
||||
}
|
||||
|
||||
void CoreTiming::RemoveEvent(const EventType* event_type) {
|
||||
const auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
|
||||
[&](const Event& e) { return e.type == event_type; });
|
||||
|
||||
// Removing random items breaks the invariant so we have to re-establish it.
|
||||
if (itr != event_queue.end()) {
|
||||
@@ -176,22 +145,24 @@ void RemoveEvent(const EventType* event_type) {
|
||||
}
|
||||
}
|
||||
|
||||
void RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
|
||||
void CoreTiming::RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
|
||||
MoveEvents();
|
||||
RemoveEvent(event_type);
|
||||
}
|
||||
|
||||
void ForceExceptionCheck(s64 cycles) {
|
||||
void CoreTiming::ForceExceptionCheck(s64 cycles) {
|
||||
cycles = std::max<s64>(0, cycles);
|
||||
if (downcount > cycles) {
|
||||
// downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
|
||||
// here. Account for cycles already executed by adjusting the g.slice_length
|
||||
slice_length -= downcount - static_cast<int>(cycles);
|
||||
downcount = static_cast<int>(cycles);
|
||||
if (downcount <= cycles) {
|
||||
return;
|
||||
}
|
||||
|
||||
// downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
|
||||
// here. Account for cycles already executed by adjusting the g.slice_length
|
||||
slice_length -= downcount - static_cast<int>(cycles);
|
||||
downcount = static_cast<int>(cycles);
|
||||
}
|
||||
|
||||
void MoveEvents() {
|
||||
void CoreTiming::MoveEvents() {
|
||||
for (Event ev; ts_queue.Pop(ev);) {
|
||||
ev.fifo_order = event_fifo_id++;
|
||||
event_queue.emplace_back(std::move(ev));
|
||||
@@ -199,13 +170,13 @@ void MoveEvents() {
|
||||
}
|
||||
}
|
||||
|
||||
void Advance() {
|
||||
void CoreTiming::Advance() {
|
||||
MoveEvents();
|
||||
for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) {
|
||||
UnscheduleEvent(ev.first, ev.second);
|
||||
}
|
||||
|
||||
int cycles_executed = slice_length - downcount;
|
||||
const int cycles_executed = slice_length - downcount;
|
||||
global_timer += cycles_executed;
|
||||
slice_length = MAX_SLICE_LENGTH;
|
||||
|
||||
@@ -229,17 +200,17 @@ void Advance() {
|
||||
downcount = slice_length;
|
||||
}
|
||||
|
||||
void Idle() {
|
||||
void CoreTiming::Idle() {
|
||||
idled_cycles += downcount;
|
||||
downcount = 0;
|
||||
}
|
||||
|
||||
std::chrono::microseconds GetGlobalTimeUs() {
|
||||
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
|
||||
return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
|
||||
}
|
||||
|
||||
int GetDowncount() {
|
||||
int CoreTiming::GetDowncount() const {
|
||||
return downcount;
|
||||
}
|
||||
|
||||
} // namespace CoreTiming
|
||||
} // namespace Core::Timing
|
||||
|
||||
@@ -4,6 +4,27 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "common/threadsafe_queue.h"
|
||||
|
||||
namespace Core::Timing {
|
||||
|
||||
/// A callback that may be scheduled for a particular core timing event.
|
||||
using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
|
||||
|
||||
/// Contains the characteristics of a particular event.
|
||||
struct EventType {
|
||||
/// The event's callback function.
|
||||
TimedCallback callback;
|
||||
/// A pointer to the name of the event.
|
||||
const std::string* name;
|
||||
};
|
||||
|
||||
/**
|
||||
* This is a system to schedule events into the emulated machine's future. Time is measured
|
||||
* in main CPU clock cycles.
|
||||
@@ -16,80 +37,120 @@
|
||||
* inside callback:
|
||||
* ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
|
||||
*/
|
||||
class CoreTiming {
|
||||
public:
|
||||
CoreTiming();
|
||||
~CoreTiming();
|
||||
|
||||
#include <chrono>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include "common/common_types.h"
|
||||
CoreTiming(const CoreTiming&) = delete;
|
||||
CoreTiming(CoreTiming&&) = delete;
|
||||
|
||||
namespace CoreTiming {
|
||||
CoreTiming& operator=(const CoreTiming&) = delete;
|
||||
CoreTiming& operator=(CoreTiming&&) = delete;
|
||||
|
||||
struct EventType;
|
||||
/// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
|
||||
/// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
|
||||
void Initialize();
|
||||
|
||||
using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
|
||||
/// Tears down all timing related functionality.
|
||||
void Shutdown();
|
||||
|
||||
/**
|
||||
* CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
|
||||
* required to end slice -1 and start slice 0 before the first cycle of code is executed.
|
||||
*/
|
||||
void Init();
|
||||
void Shutdown();
|
||||
/// Registers a core timing event with the given name and callback.
|
||||
///
|
||||
/// @param name The name of the core timing event to register.
|
||||
/// @param callback The callback to execute for the event.
|
||||
///
|
||||
/// @returns An EventType instance representing the registered event.
|
||||
///
|
||||
/// @pre The name of the event being registered must be unique among all
|
||||
/// registered events.
|
||||
///
|
||||
EventType* RegisterEvent(const std::string& name, TimedCallback callback);
|
||||
|
||||
/**
|
||||
* This should only be called from the emu thread, if you are calling it any other thread, you are
|
||||
* doing something evil
|
||||
*/
|
||||
u64 GetTicks();
|
||||
u64 GetIdleTicks();
|
||||
void AddTicks(u64 ticks);
|
||||
/// Unregisters all registered events thus far.
|
||||
void UnregisterAllEvents();
|
||||
|
||||
/**
|
||||
* Returns the event_type identifier. if name is not unique, it will assert.
|
||||
*/
|
||||
EventType* RegisterEvent(const std::string& name, TimedCallback callback);
|
||||
void UnregisterAllEvents();
|
||||
/// After the first Advance, the slice lengths and the downcount will be reduced whenever an
|
||||
/// event is scheduled earlier than the current values.
|
||||
///
|
||||
/// Scheduling from a callback will not update the downcount until the Advance() completes.
|
||||
void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
|
||||
|
||||
/**
|
||||
* After the first Advance, the slice lengths and the downcount will be reduced whenever an event
|
||||
* is scheduled earlier than the current values.
|
||||
* Scheduling from a callback will not update the downcount until the Advance() completes.
|
||||
*/
|
||||
void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
|
||||
/// This is to be called when outside of hle threads, such as the graphics thread, wants to
|
||||
/// schedule things to be executed on the main thread.
|
||||
///
|
||||
/// @note This doesn't change slice_length and thus events scheduled by this might be
|
||||
/// called with a delay of up to MAX_SLICE_LENGTH
|
||||
void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
|
||||
u64 userdata = 0);
|
||||
|
||||
/**
|
||||
* This is to be called when outside of hle threads, such as the graphics thread, wants to
|
||||
* schedule things to be executed on the main thread.
|
||||
* Not that this doesn't change slice_length and thus events scheduled by this might be called
|
||||
* with a delay of up to MAX_SLICE_LENGTH
|
||||
*/
|
||||
void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata);
|
||||
void UnscheduleEvent(const EventType* event_type, u64 userdata);
|
||||
void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);
|
||||
|
||||
void UnscheduleEvent(const EventType* event_type, u64 userdata);
|
||||
void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);
|
||||
/// We only permit one event of each type in the queue at a time.
|
||||
void RemoveEvent(const EventType* event_type);
|
||||
void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
|
||||
|
||||
/// We only permit one event of each type in the queue at a time.
|
||||
void RemoveEvent(const EventType* event_type);
|
||||
void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
|
||||
void ForceExceptionCheck(s64 cycles);
|
||||
|
||||
/** Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
|
||||
* the previous timing slice and begins the next one, you must Advance from the previous
|
||||
* slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
|
||||
* Advance() is required to initialize the slice length before the first cycle of emulated
|
||||
* instructions is executed.
|
||||
*/
|
||||
void Advance();
|
||||
void MoveEvents();
|
||||
/// This should only be called from the emu thread, if you are calling it any other thread,
|
||||
/// you are doing something evil
|
||||
u64 GetTicks() const;
|
||||
|
||||
/// Pretend that the main CPU has executed enough cycles to reach the next event.
|
||||
void Idle();
|
||||
u64 GetIdleTicks() const;
|
||||
|
||||
/// Clear all pending events. This should ONLY be done on exit.
|
||||
void ClearPendingEvents();
|
||||
void AddTicks(u64 ticks);
|
||||
|
||||
void ForceExceptionCheck(s64 cycles);
|
||||
/// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
|
||||
/// the previous timing slice and begins the next one, you must Advance from the previous
|
||||
/// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
|
||||
/// Advance() is required to initialize the slice length before the first cycle of emulated
|
||||
/// instructions is executed.
|
||||
void Advance();
|
||||
|
||||
std::chrono::microseconds GetGlobalTimeUs();
|
||||
/// Pretend that the main CPU has executed enough cycles to reach the next event.
|
||||
void Idle();
|
||||
|
||||
int GetDowncount();
|
||||
std::chrono::microseconds GetGlobalTimeUs() const;
|
||||
|
||||
} // namespace CoreTiming
|
||||
int GetDowncount() const;
|
||||
|
||||
private:
|
||||
struct Event;
|
||||
|
||||
/// Clear all pending events. This should ONLY be done on exit.
|
||||
void ClearPendingEvents();
|
||||
void MoveEvents();
|
||||
|
||||
s64 global_timer = 0;
|
||||
s64 idled_cycles = 0;
|
||||
int slice_length = 0;
|
||||
int downcount = 0;
|
||||
|
||||
// Are we in a function that has been called from Advance()
|
||||
// If events are scheduled from a function that gets called from Advance(),
|
||||
// don't change slice_length and downcount.
|
||||
bool is_global_timer_sane = false;
|
||||
|
||||
// The queue is a min-heap using std::make_heap/push_heap/pop_heap.
|
||||
// We don't use std::priority_queue because we need to be able to serialize, unserialize and
|
||||
// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
|
||||
// accomodated by the standard adaptor class.
|
||||
std::vector<Event> event_queue;
|
||||
u64 event_fifo_id = 0;
|
||||
|
||||
// Stores each element separately as a linked list node so pointers to elements
|
||||
// remain stable regardless of rehashes/resizing.
|
||||
std::unordered_map<std::string, EventType> event_types;
|
||||
|
||||
// The queue for storing the events from other threads threadsafe until they will be added
|
||||
// to the event_queue by the emu thread
|
||||
Common::MPSCQueue<Event> ts_queue;
|
||||
|
||||
// The queue for unscheduling the events from other threads threadsafe
|
||||
Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue;
|
||||
|
||||
EventType* ev_lost = nullptr;
|
||||
};
|
||||
|
||||
} // namespace Core::Timing
|
||||
|
||||
@@ -7,8 +7,9 @@
|
||||
#include <cinttypes>
|
||||
#include <limits>
|
||||
#include "common/logging/log.h"
|
||||
#include "common/uint128.h"
|
||||
|
||||
namespace CoreTiming {
|
||||
namespace Core::Timing {
|
||||
|
||||
constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;
|
||||
|
||||
@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
|
||||
return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
|
||||
}
|
||||
|
||||
} // namespace CoreTiming
|
||||
u64 CpuCyclesToClockCycles(u64 ticks) {
|
||||
const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
|
||||
return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
|
||||
}
|
||||
|
||||
} // namespace Core::Timing
|
||||
|
||||
@@ -6,11 +6,12 @@
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace CoreTiming {
|
||||
namespace Core::Timing {
|
||||
|
||||
// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
|
||||
// The exact value used is of course unverified.
|
||||
constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
|
||||
constexpr u64 CNTFREQ = 19200000; // Value from fusee.
|
||||
|
||||
inline s64 msToCycles(int ms) {
|
||||
// since ms is int there is no way to overflow
|
||||
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
|
||||
return cycles * 1000 / BASE_CLOCK_RATE;
|
||||
}
|
||||
|
||||
} // namespace CoreTiming
|
||||
u64 CpuCyclesToClockCycles(u64 ticks);
|
||||
|
||||
} // namespace Core::Timing
|
||||
|
||||
@@ -27,7 +27,8 @@ void CpuCoreManager::Initialize(System& system) {
|
||||
exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
|
||||
|
||||
for (std::size_t index = 0; index < cores.size(); ++index) {
|
||||
cores[index] = std::make_unique<Cpu>(*exclusive_monitor, *barrier, index);
|
||||
cores[index] =
|
||||
std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index);
|
||||
}
|
||||
|
||||
// Create threads for CPU cores 1-3, and build thread_to_cpu map
|
||||
|
||||
@@ -86,11 +86,11 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
|
||||
}
|
||||
|
||||
struct KernelCore::Impl {
|
||||
void Initialize(KernelCore& kernel) {
|
||||
void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) {
|
||||
Shutdown();
|
||||
|
||||
InitializeSystemResourceLimit(kernel);
|
||||
InitializeThreads();
|
||||
InitializeThreads(core_timing);
|
||||
}
|
||||
|
||||
void Shutdown() {
|
||||
@@ -122,9 +122,9 @@ struct KernelCore::Impl {
|
||||
ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess());
|
||||
}
|
||||
|
||||
void InitializeThreads() {
|
||||
void InitializeThreads(Core::Timing::CoreTiming& core_timing) {
|
||||
thread_wakeup_event_type =
|
||||
CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
|
||||
core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
|
||||
}
|
||||
|
||||
std::atomic<u32> next_object_id{0};
|
||||
@@ -137,7 +137,7 @@ struct KernelCore::Impl {
|
||||
|
||||
SharedPtr<ResourceLimit> system_resource_limit;
|
||||
|
||||
CoreTiming::EventType* thread_wakeup_event_type = nullptr;
|
||||
Core::Timing::EventType* thread_wakeup_event_type = nullptr;
|
||||
// TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
|
||||
// allowing us to simply use a pool index or similar.
|
||||
Kernel::HandleTable thread_wakeup_callback_handle_table;
|
||||
@@ -152,8 +152,8 @@ KernelCore::~KernelCore() {
|
||||
Shutdown();
|
||||
}
|
||||
|
||||
void KernelCore::Initialize() {
|
||||
impl->Initialize(*this);
|
||||
void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) {
|
||||
impl->Initialize(*this, core_timing);
|
||||
}
|
||||
|
||||
void KernelCore::Shutdown() {
|
||||
@@ -213,7 +213,7 @@ u64 KernelCore::CreateNewProcessID() {
|
||||
return impl->next_process_id++;
|
||||
}
|
||||
|
||||
CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
|
||||
Core::Timing::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
|
||||
return impl->thread_wakeup_event_type;
|
||||
}
|
||||
|
||||
|
||||
@@ -11,9 +11,10 @@
|
||||
template <typename T>
|
||||
class ResultVal;
|
||||
|
||||
namespace CoreTiming {
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
struct EventType;
|
||||
}
|
||||
} // namespace Core::Timing
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
@@ -39,7 +40,11 @@ public:
|
||||
KernelCore& operator=(KernelCore&&) = delete;
|
||||
|
||||
/// Resets the kernel to a clean slate for use.
|
||||
void Initialize();
|
||||
///
|
||||
/// @param core_timing CoreTiming instance used to create any necessary
|
||||
/// kernel-specific callback events.
|
||||
///
|
||||
void Initialize(Core::Timing::CoreTiming& core_timing);
|
||||
|
||||
/// Clears all resources in use by the kernel instance.
|
||||
void Shutdown();
|
||||
@@ -89,7 +94,7 @@ private:
|
||||
u64 CreateNewThreadID();
|
||||
|
||||
/// Retrieves the event type used for thread wakeup callbacks.
|
||||
CoreTiming::EventType* ThreadWakeupCallbackEventType() const;
|
||||
Core::Timing::EventType* ThreadWakeupCallbackEventType() const;
|
||||
|
||||
/// Provides a reference to the thread wakeup callback handle table.
|
||||
Kernel::HandleTable& ThreadWakeupCallbackHandleTable();
|
||||
|
||||
@@ -111,7 +111,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
|
||||
|
||||
void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
|
||||
const u64 prev_switch_ticks = last_context_switch_time;
|
||||
const u64 most_recent_switch_ticks = CoreTiming::GetTicks();
|
||||
const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
|
||||
const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
|
||||
|
||||
if (thread != nullptr) {
|
||||
|
||||
@@ -918,6 +918,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
|
||||
}
|
||||
|
||||
const auto& system = Core::System::GetInstance();
|
||||
const auto& core_timing = system.CoreTiming();
|
||||
const auto& scheduler = system.CurrentScheduler();
|
||||
const auto* const current_thread = scheduler.GetCurrentThread();
|
||||
const bool same_thread = current_thread == thread;
|
||||
@@ -927,9 +928,9 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
|
||||
if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
|
||||
const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();
|
||||
|
||||
out_ticks = thread_ticks + (CoreTiming::GetTicks() - prev_ctx_ticks);
|
||||
out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks);
|
||||
} else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
|
||||
out_ticks = CoreTiming::GetTicks() - prev_ctx_ticks;
|
||||
out_ticks = core_timing.GetTicks() - prev_ctx_ticks;
|
||||
}
|
||||
|
||||
*result = out_ticks;
|
||||
@@ -1546,10 +1547,11 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
|
||||
static u64 GetSystemTick() {
|
||||
LOG_TRACE(Kernel_SVC, "called");
|
||||
|
||||
const u64 result{CoreTiming::GetTicks()};
|
||||
auto& core_timing = Core::System::GetInstance().CoreTiming();
|
||||
const u64 result{core_timing.GetTicks()};
|
||||
|
||||
// Advance time to defeat dumb games that busy-wait for the frame to end.
|
||||
CoreTiming::AddTicks(400);
|
||||
core_timing.AddTicks(400);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -43,7 +43,8 @@ Thread::~Thread() = default;
|
||||
|
||||
void Thread::Stop() {
|
||||
// Cancel any outstanding wakeup events for this thread
|
||||
CoreTiming::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle);
|
||||
Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
|
||||
callback_handle);
|
||||
kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
|
||||
callback_handle = 0;
|
||||
|
||||
@@ -85,12 +86,14 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
|
||||
|
||||
// This function might be called from any thread so we have to be cautious and use the
|
||||
// thread-safe version of ScheduleEvent.
|
||||
CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds),
|
||||
kernel.ThreadWakeupCallbackEventType(), callback_handle);
|
||||
Core::System::GetInstance().CoreTiming().ScheduleEventThreadsafe(
|
||||
Core::Timing::nsToCycles(nanoseconds), kernel.ThreadWakeupCallbackEventType(),
|
||||
callback_handle);
|
||||
}
|
||||
|
||||
void Thread::CancelWakeupTimer() {
|
||||
CoreTiming::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), callback_handle);
|
||||
Core::System::GetInstance().CoreTiming().UnscheduleEventThreadsafe(
|
||||
kernel.ThreadWakeupCallbackEventType(), callback_handle);
|
||||
}
|
||||
|
||||
static std::optional<s32> GetNextProcessorId(u64 mask) {
|
||||
@@ -189,6 +192,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
|
||||
return ResultCode(-1);
|
||||
}
|
||||
|
||||
auto& system = Core::System::GetInstance();
|
||||
SharedPtr<Thread> thread(new Thread(kernel));
|
||||
|
||||
thread->thread_id = kernel.CreateNewThreadID();
|
||||
@@ -197,7 +201,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
|
||||
thread->stack_top = stack_top;
|
||||
thread->tpidr_el0 = 0;
|
||||
thread->nominal_priority = thread->current_priority = priority;
|
||||
thread->last_running_ticks = CoreTiming::GetTicks();
|
||||
thread->last_running_ticks = system.CoreTiming().GetTicks();
|
||||
thread->processor_id = processor_id;
|
||||
thread->ideal_core = processor_id;
|
||||
thread->affinity_mask = 1ULL << processor_id;
|
||||
@@ -208,7 +212,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
|
||||
thread->name = std::move(name);
|
||||
thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
|
||||
thread->owner_process = &owner_process;
|
||||
thread->scheduler = &Core::System::GetInstance().Scheduler(processor_id);
|
||||
thread->scheduler = &system.Scheduler(processor_id);
|
||||
thread->scheduler->AddThread(thread, priority);
|
||||
thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
|
||||
|
||||
@@ -257,7 +261,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
|
||||
}
|
||||
|
||||
if (status == ThreadStatus::Running) {
|
||||
last_running_ticks = CoreTiming::GetTicks();
|
||||
last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
|
||||
}
|
||||
|
||||
status = new_status;
|
||||
|
||||
@@ -68,12 +68,12 @@ public:
|
||||
RegisterHandlers(functions);
|
||||
|
||||
// This is the event handle used to check if the audio buffer was released
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
|
||||
"IAudioOutBufferReleased");
|
||||
auto& system = Core::System::GetInstance();
|
||||
buffer_event = Kernel::WritableEvent::CreateEventPair(
|
||||
system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");
|
||||
|
||||
stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count,
|
||||
std::move(unique_name),
|
||||
stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
|
||||
audio_params.channel_count, std::move(unique_name),
|
||||
[=]() { buffer_event.writable->Signal(); });
|
||||
}
|
||||
|
||||
|
||||
@@ -42,10 +42,11 @@ public:
|
||||
// clang-format on
|
||||
RegisterHandlers(functions);
|
||||
|
||||
auto& kernel = Core::System::GetInstance().Kernel();
|
||||
system_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
|
||||
"IAudioRenderer:SystemEvent");
|
||||
renderer = std::make_unique<AudioCore::AudioRenderer>(audren_params, system_event.writable);
|
||||
auto& system = Core::System::GetInstance();
|
||||
system_event = Kernel::WritableEvent::CreateEventPair(
|
||||
system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
|
||||
renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
|
||||
system_event.writable);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -7,6 +7,10 @@
|
||||
#include "common/common_types.h"
|
||||
#include "common/swap.h"
|
||||
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
}
|
||||
|
||||
namespace Service::HID {
|
||||
class ControllerBase {
|
||||
public:
|
||||
@@ -20,7 +24,8 @@ public:
|
||||
virtual void OnRelease() = 0;
|
||||
|
||||
// When the controller is requesting an update for the shared memory
|
||||
virtual void OnUpdate(u8* data, std::size_t size) = 0;
|
||||
virtual void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
|
||||
std::size_t size) = 0;
|
||||
|
||||
// Called when input devices should be loaded
|
||||
virtual void OnLoadInputDevices() = 0;
|
||||
|
||||
@@ -21,8 +21,9 @@ void Controller_DebugPad::OnInit() {}
|
||||
|
||||
void Controller_DebugPad::OnRelease() {}
|
||||
|
||||
void Controller_DebugPad::OnUpdate(u8* data, std::size_t size) {
|
||||
shared_memory.header.timestamp = CoreTiming::GetTicks();
|
||||
void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
|
||||
std::size_t size) {
|
||||
shared_memory.header.timestamp = core_timing.GetTicks();
|
||||
shared_memory.header.total_entry_count = 17;
|
||||
|
||||
if (!IsControllerActivated()) {
|
||||
|
||||
@@ -26,7 +26,7 @@ public:
|
||||
void OnRelease() override;
|
||||
|
||||
// When the controller is requesting an update for the shared memory
|
||||
void OnUpdate(u8* data, std::size_t size) override;
|
||||
void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
|
||||
|
||||
// Called when input devices should be loaded
|
||||
void OnLoadInputDevices() override;
|
||||
|
||||
@@ -17,8 +17,9 @@ void Controller_Gesture::OnInit() {}
|
||||
|
||||
void Controller_Gesture::OnRelease() {}
|
||||
|
||||
void Controller_Gesture::OnUpdate(u8* data, std::size_t size) {
|
||||
shared_memory.header.timestamp = CoreTiming::GetTicks();
|
||||
void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
|
||||
std::size_t size) {
|
||||
shared_memory.header.timestamp = core_timing.GetTicks();
|
||||
shared_memory.header.total_entry_count = 17;
|
||||
|
||||
if (!IsControllerActivated()) {
|
||||
|
||||
@@ -22,7 +22,7 @@ public:
|
||||
void OnRelease() override;
|
||||
|
||||
// When the controller is requesting an update for the shared memory
|
||||
void OnUpdate(u8* data, size_t size) override;
|
||||
void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, size_t size) override;
|
||||
|
||||
// Called when input devices should be loaded
|
||||
void OnLoadInputDevices() override;
|
||||
|
||||
@@ -19,8 +19,9 @@ void Controller_Keyboard::OnInit() {}
|
||||
|
||||
void Controller_Keyboard::OnRelease() {}
|
||||
|
||||
void Controller_Keyboard::OnUpdate(u8* data, std::size_t size) {
|
||||
shared_memory.header.timestamp = CoreTiming::GetTicks();
|
||||
void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
|
||||
std::size_t size) {
|
||||
shared_memory.header.timestamp = core_timing.GetTicks();
|
||||
shared_memory.header.total_entry_count = 17;
|
||||
|
||||
if (!IsControllerActivated()) {
|
||||
|
||||
@@ -25,7 +25,7 @@ public:
|
||||
void OnRelease() override;
|
||||
|
||||
// When the controller is requesting an update for the shared memory
|
||||
void OnUpdate(u8* data, std::size_t size) override;
|
||||
void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
|
||||
|
||||
// Called when input devices should be loaded
|
||||
void OnLoadInputDevices() override;
|
||||
|
||||
@@ -17,8 +17,9 @@ Controller_Mouse::~Controller_Mouse() = default;
|
||||
void Controller_Mouse::OnInit() {}
|
||||
void Controller_Mouse::OnRelease() {}
|
||||
|
||||
void Controller_Mouse::OnUpdate(u8* data, std::size_t size) {
|
||||
shared_memory.header.timestamp = CoreTiming::GetTicks();
|
||||
void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
|
||||
std::size_t size) {
|
||||
shared_memory.header.timestamp = core_timing.GetTicks();
|
||||
shared_memory.header.total_entry_count = 17;
|
||||
|
||||
if (!IsControllerActivated()) {
|
||||
|
||||
@@ -24,7 +24,7 @@ public:
|
||||
void OnRelease() override;
|
||||
|
||||
// When the controller is requesting an update for the shared memory
|
||||
void OnUpdate(u8* data, std::size_t size) override;
|
||||
void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
|
||||
|
||||
// Called when input devices should be loaded
|
||||
void OnLoadInputDevices() override;
|
||||
|
||||
@@ -288,7 +288,8 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
|
||||
rstick_entry.y = static_cast<s32>(stick_r_y_f * HID_JOYSTICK_MAX);
|
||||
}
|
||||
|
||||
void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
|
||||
void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
|
||||
std::size_t data_len) {
|
||||
if (!IsControllerActivated())
|
||||
return;
|
||||
for (std::size_t i = 0; i < shared_memory_entries.size(); i++) {
|
||||
@@ -308,7 +309,7 @@ void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
|
||||
const auto& last_entry =
|
||||
main_controller->npad[main_controller->common.last_entry_index];
|
||||
|
||||
main_controller->common.timestamp = CoreTiming::GetTicks();
|
||||
main_controller->common.timestamp = core_timing.GetTicks();
|
||||
main_controller->common.last_entry_index =
|
||||
(main_controller->common.last_entry_index + 1) % 17;
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ public:
|
||||
void OnRelease() override;
|
||||
|
||||
// When the controller is requesting an update for the shared memory
|
||||
void OnUpdate(u8* data, std::size_t size) override;
|
||||
void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
|
||||
|
||||
// Called when input devices should be loaded
|
||||
void OnLoadInputDevices() override;
|
||||
|
||||
@@ -16,13 +16,14 @@ void Controller_Stubbed::OnInit() {}
|
||||
|
||||
void Controller_Stubbed::OnRelease() {}
|
||||
|
||||
void Controller_Stubbed::OnUpdate(u8* data, std::size_t size) {
|
||||
void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
|
||||
std::size_t size) {
|
||||
if (!smart_update) {
|
||||
return;
|
||||
}
|
||||
|
||||
CommonHeader header{};
|
||||
header.timestamp = CoreTiming::GetTicks();
|
||||
header.timestamp = core_timing.GetTicks();
|
||||
header.total_entry_count = 17;
|
||||
header.entry_count = 0;
|
||||
header.last_entry_index = 0;
|
||||
|
||||
@@ -20,7 +20,7 @@ public:
|
||||
void OnRelease() override;
|
||||
|
||||
// When the controller is requesting an update for the shared memory
|
||||
void OnUpdate(u8* data, std::size_t size) override;
|
||||
void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
|
||||
|
||||
// Called when input devices should be loaded
|
||||
void OnLoadInputDevices() override;
|
||||
|
||||
@@ -20,8 +20,9 @@ void Controller_Touchscreen::OnInit() {}
|
||||
|
||||
void Controller_Touchscreen::OnRelease() {}
|
||||
|
||||
void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
|
||||
shared_memory.header.timestamp = CoreTiming::GetTicks();
|
||||
void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
|
||||
std::size_t size) {
|
||||
shared_memory.header.timestamp = core_timing.GetTicks();
|
||||
shared_memory.header.total_entry_count = 17;
|
||||
|
||||
if (!IsControllerActivated()) {
|
||||
@@ -48,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
|
||||
touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
|
||||
touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
|
||||
touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
|
||||
const u64 tick = CoreTiming::GetTicks();
|
||||
const u64 tick = core_timing.GetTicks();
|
||||
touch_entry.delta_time = tick - last_touch;
|
||||
last_touch = tick;
|
||||
touch_entry.finger = Settings::values.touchscreen.finger;
|
||||
|
||||
@@ -24,7 +24,7 @@ public:
|
||||
void OnRelease() override;
|
||||
|
||||
// When the controller is requesting an update for the shared memory
|
||||
void OnUpdate(u8* data, std::size_t size) override;
|
||||
void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
|
||||
|
||||
// Called when input devices should be loaded
|
||||
void OnLoadInputDevices() override;
|
||||
|
||||
@@ -17,9 +17,10 @@ void Controller_XPad::OnInit() {}
|
||||
|
||||
void Controller_XPad::OnRelease() {}
|
||||
|
||||
void Controller_XPad::OnUpdate(u8* data, std::size_t size) {
|
||||
void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
|
||||
std::size_t size) {
|
||||
for (auto& xpad_entry : shared_memory.shared_memory_entries) {
|
||||
xpad_entry.header.timestamp = CoreTiming::GetTicks();
|
||||
xpad_entry.header.timestamp = core_timing.GetTicks();
|
||||
xpad_entry.header.total_entry_count = 17;
|
||||
|
||||
if (!IsControllerActivated()) {
|
||||
|
||||
@@ -22,7 +22,7 @@ public:
|
||||
void OnRelease() override;
|
||||
|
||||
// When the controller is requesting an update for the shared memory
|
||||
void OnUpdate(u8* data, std::size_t size) override;
|
||||
void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;
|
||||
|
||||
// Called when input devices should be loaded
|
||||
void OnLoadInputDevices() override;
|
||||
|
||||
@@ -36,9 +36,9 @@ namespace Service::HID {
|
||||
|
||||
// Updating period for each HID device.
|
||||
// TODO(ogniK): Find actual polling rate of hid
|
||||
constexpr u64 pad_update_ticks = CoreTiming::BASE_CLOCK_RATE / 66;
|
||||
constexpr u64 accelerometer_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100;
|
||||
constexpr u64 gyroscope_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100;
|
||||
constexpr u64 pad_update_ticks = Core::Timing::BASE_CLOCK_RATE / 66;
|
||||
constexpr u64 accelerometer_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
|
||||
constexpr u64 gyroscope_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
|
||||
constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
|
||||
|
||||
IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
|
||||
@@ -73,14 +73,15 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
|
||||
GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000);
|
||||
|
||||
// Register update callbacks
|
||||
auto& core_timing = Core::System::GetInstance().CoreTiming();
|
||||
pad_update_event =
|
||||
CoreTiming::RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) {
|
||||
core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) {
|
||||
UpdateControllers(userdata, cycles_late);
|
||||
});
|
||||
|
||||
// TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)
|
||||
|
||||
CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event);
|
||||
core_timing.ScheduleEvent(pad_update_ticks, pad_update_event);
|
||||
|
||||
ReloadInputDevices();
|
||||
}
|
||||
@@ -94,7 +95,7 @@ void IAppletResource::DeactivateController(HidController controller) {
|
||||
}
|
||||
|
||||
IAppletResource ::~IAppletResource() {
|
||||
CoreTiming::UnscheduleEvent(pad_update_event, 0);
|
||||
Core::System::GetInstance().CoreTiming().UnscheduleEvent(pad_update_event, 0);
|
||||
}
|
||||
|
||||
void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
|
||||
@@ -106,15 +107,17 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
|
||||
}
|
||||
|
||||
void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) {
|
||||
auto& core_timing = Core::System::GetInstance().CoreTiming();
|
||||
|
||||
const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
|
||||
for (const auto& controller : controllers) {
|
||||
if (should_reload) {
|
||||
controller->OnLoadInputDevices();
|
||||
}
|
||||
controller->OnUpdate(shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
|
||||
controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
|
||||
}
|
||||
|
||||
CoreTiming::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
|
||||
core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
|
||||
}
|
||||
|
||||
class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> {
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#include "controllers/controller_base.h"
|
||||
#include "core/hle/service/service.h"
|
||||
|
||||
namespace CoreTiming {
|
||||
namespace Core::Timing {
|
||||
struct EventType;
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ private:
|
||||
|
||||
Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;
|
||||
|
||||
CoreTiming::EventType* pad_update_event;
|
||||
Core::Timing::EventType* pad_update_event;
|
||||
|
||||
std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)>
|
||||
controllers{};
|
||||
|
||||
@@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 5};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.PushRaw<u64>(CoreTiming::GetTicks());
|
||||
rb.PushRaw<u64>(Core::System::GetInstance().CoreTiming().GetTicks());
|
||||
rb.PushRaw<u32>(0);
|
||||
}
|
||||
|
||||
|
||||
@@ -25,9 +25,9 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
|
||||
u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
|
||||
const MathUtil::Rectangle<int>& crop_rect) {
|
||||
VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
|
||||
LOG_WARNING(Service,
|
||||
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
|
||||
addr, offset, width, height, stride, format);
|
||||
LOG_TRACE(Service,
|
||||
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
|
||||
addr, offset, width, height, stride, format);
|
||||
|
||||
using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
|
||||
const Tegra::FramebufferConfig framebuffer{
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <cstring>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
|
||||
@@ -184,7 +185,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
|
||||
|
||||
IoctlGetGpuTime params{};
|
||||
std::memcpy(¶ms, input.data(), input.size());
|
||||
params.gpu_time = CoreTiming::cyclesToNs(CoreTiming::GetTicks());
|
||||
params.gpu_time = Core::Timing::cyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks());
|
||||
std::memcpy(output.data(), ¶ms, output.size());
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -13,10 +13,6 @@
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/kernel/writable_event.h"
|
||||
|
||||
namespace CoreTiming {
|
||||
struct EventType;
|
||||
}
|
||||
|
||||
namespace Service::NVFlinger {
|
||||
|
||||
struct IGBPBuffer {
|
||||
|
||||
@@ -25,21 +25,21 @@
|
||||
namespace Service::NVFlinger {
|
||||
|
||||
constexpr std::size_t SCREEN_REFRESH_RATE = 60;
|
||||
constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
|
||||
constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
|
||||
|
||||
NVFlinger::NVFlinger() {
|
||||
NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
|
||||
// Schedule the screen composition events
|
||||
composition_event =
|
||||
CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
|
||||
core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
|
||||
Compose();
|
||||
CoreTiming::ScheduleEvent(frame_ticks - cycles_late, composition_event);
|
||||
this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
|
||||
});
|
||||
|
||||
CoreTiming::ScheduleEvent(frame_ticks, composition_event);
|
||||
core_timing.ScheduleEvent(frame_ticks, composition_event);
|
||||
}
|
||||
|
||||
NVFlinger::~NVFlinger() {
|
||||
CoreTiming::UnscheduleEvent(composition_event, 0);
|
||||
core_timing.UnscheduleEvent(composition_event, 0);
|
||||
}
|
||||
|
||||
void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
|
||||
|
||||
@@ -14,9 +14,10 @@
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
namespace CoreTiming {
|
||||
namespace Core::Timing {
|
||||
class CoreTiming;
|
||||
struct EventType;
|
||||
}
|
||||
} // namespace Core::Timing
|
||||
|
||||
namespace Kernel {
|
||||
class ReadableEvent;
|
||||
@@ -52,7 +53,7 @@ struct Display {
|
||||
|
||||
class NVFlinger final {
|
||||
public:
|
||||
NVFlinger();
|
||||
explicit NVFlinger(Core::Timing::CoreTiming& core_timing);
|
||||
~NVFlinger();
|
||||
|
||||
/// Sets the NVDrv module instance to use to send buffers to the GPU.
|
||||
@@ -115,8 +116,11 @@ private:
|
||||
/// layers.
|
||||
u32 next_buffer_queue_id = 1;
|
||||
|
||||
/// CoreTiming event that handles screen composition.
|
||||
CoreTiming::EventType* composition_event;
|
||||
/// Event that handles screen composition.
|
||||
Core::Timing::EventType* composition_event;
|
||||
|
||||
/// Core timing instance for registering/unregistering the composition event.
|
||||
Core::Timing::CoreTiming& core_timing;
|
||||
};
|
||||
|
||||
} // namespace Service::NVFlinger
|
||||
|
||||
@@ -194,10 +194,11 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
|
||||
// Module interface
|
||||
|
||||
/// Initialize ServiceManager
|
||||
void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs) {
|
||||
void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
|
||||
FileSys::VfsFilesystem& vfs) {
|
||||
// NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it
|
||||
// here and pass it into the respective InstallInterfaces functions.
|
||||
auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>();
|
||||
auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming());
|
||||
|
||||
SM::ServiceManager::InstallInterfaces(sm);
|
||||
|
||||
|
||||
@@ -14,6 +14,14 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Namespace Service
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace FileSys {
|
||||
class VfsFilesystem;
|
||||
}
|
||||
|
||||
namespace Kernel {
|
||||
class ClientPort;
|
||||
class ServerPort;
|
||||
@@ -21,10 +29,6 @@ class ServerSession;
|
||||
class HLERequestContext;
|
||||
} // namespace Kernel
|
||||
|
||||
namespace FileSys {
|
||||
class VfsFilesystem;
|
||||
}
|
||||
|
||||
namespace Service {
|
||||
|
||||
namespace SM {
|
||||
@@ -178,7 +182,8 @@ private:
|
||||
};
|
||||
|
||||
/// Initialize ServiceManager
|
||||
void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs);
|
||||
void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
|
||||
FileSys::VfsFilesystem& vfs);
|
||||
|
||||
/// Shutdown ServiceManager
|
||||
void Shutdown();
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <chrono>
|
||||
#include <ctime>
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
#include "core/hle/ipc_helpers.h"
|
||||
@@ -106,8 +107,9 @@ private:
|
||||
void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_Time, "called");
|
||||
|
||||
SteadyClockTimePoint steady_clock_time_point{
|
||||
CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000};
|
||||
const auto& core_timing = Core::System::GetInstance().CoreTiming();
|
||||
const SteadyClockTimePoint steady_clock_time_point{
|
||||
Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000};
|
||||
IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.PushRaw(steady_clock_time_point);
|
||||
@@ -281,8 +283,9 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto& core_timing = Core::System::GetInstance().CoreTiming();
|
||||
const SteadyClockTimePoint steady_clock_time_point{
|
||||
CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000, {}};
|
||||
Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000, {}};
|
||||
|
||||
CalendarTime calendar_time{};
|
||||
calendar_time.year = tm->tm_year + 1900;
|
||||
|
||||
@@ -28,100 +28,103 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
|
||||
REQUIRE(lateness == cycles_late);
|
||||
}
|
||||
|
||||
class ScopeInit final {
|
||||
public:
|
||||
struct ScopeInit final {
|
||||
ScopeInit() {
|
||||
CoreTiming::Init();
|
||||
core_timing.Initialize();
|
||||
}
|
||||
~ScopeInit() {
|
||||
CoreTiming::Shutdown();
|
||||
core_timing.Shutdown();
|
||||
}
|
||||
|
||||
Core::Timing::CoreTiming core_timing;
|
||||
};
|
||||
|
||||
static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0,
|
||||
int cpu_downcount = 0) {
|
||||
static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, int downcount,
|
||||
int expected_lateness = 0, int cpu_downcount = 0) {
|
||||
callbacks_ran_flags = 0;
|
||||
expected_callback = CB_IDS[idx];
|
||||
lateness = expected_lateness;
|
||||
|
||||
CoreTiming::AddTicks(CoreTiming::GetDowncount() -
|
||||
cpu_downcount); // Pretend we executed X cycles of instructions.
|
||||
CoreTiming::Advance();
|
||||
// Pretend we executed X cycles of instructions.
|
||||
core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount);
|
||||
core_timing.Advance();
|
||||
|
||||
REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
|
||||
REQUIRE(downcount == CoreTiming::GetDowncount());
|
||||
REQUIRE(downcount == core_timing.GetDowncount());
|
||||
}
|
||||
|
||||
TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
|
||||
ScopeInit guard;
|
||||
auto& core_timing = guard.core_timing;
|
||||
|
||||
CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
|
||||
CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
|
||||
CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
|
||||
CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
|
||||
CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
|
||||
Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
|
||||
Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
|
||||
Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
|
||||
Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
|
||||
Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
|
||||
|
||||
// Enter slice 0
|
||||
CoreTiming::Advance();
|
||||
core_timing.Advance();
|
||||
|
||||
// D -> B -> C -> A -> E
|
||||
CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
|
||||
REQUIRE(1000 == CoreTiming::GetDowncount());
|
||||
CoreTiming::ScheduleEvent(500, cb_b, CB_IDS[1]);
|
||||
REQUIRE(500 == CoreTiming::GetDowncount());
|
||||
CoreTiming::ScheduleEvent(800, cb_c, CB_IDS[2]);
|
||||
REQUIRE(500 == CoreTiming::GetDowncount());
|
||||
CoreTiming::ScheduleEvent(100, cb_d, CB_IDS[3]);
|
||||
REQUIRE(100 == CoreTiming::GetDowncount());
|
||||
CoreTiming::ScheduleEvent(1200, cb_e, CB_IDS[4]);
|
||||
REQUIRE(100 == CoreTiming::GetDowncount());
|
||||
core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
|
||||
REQUIRE(1000 == core_timing.GetDowncount());
|
||||
core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
|
||||
REQUIRE(500 == core_timing.GetDowncount());
|
||||
core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]);
|
||||
REQUIRE(500 == core_timing.GetDowncount());
|
||||
core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]);
|
||||
REQUIRE(100 == core_timing.GetDowncount());
|
||||
core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
|
||||
REQUIRE(100 == core_timing.GetDowncount());
|
||||
|
||||
AdvanceAndCheck(3, 400);
|
||||
AdvanceAndCheck(1, 300);
|
||||
AdvanceAndCheck(2, 200);
|
||||
AdvanceAndCheck(0, 200);
|
||||
AdvanceAndCheck(4, MAX_SLICE_LENGTH);
|
||||
AdvanceAndCheck(core_timing, 3, 400);
|
||||
AdvanceAndCheck(core_timing, 1, 300);
|
||||
AdvanceAndCheck(core_timing, 2, 200);
|
||||
AdvanceAndCheck(core_timing, 0, 200);
|
||||
AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
|
||||
}
|
||||
|
||||
TEST_CASE("CoreTiming[Threadsave]", "[core]") {
|
||||
ScopeInit guard;
|
||||
auto& core_timing = guard.core_timing;
|
||||
|
||||
CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
|
||||
CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
|
||||
CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
|
||||
CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
|
||||
CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
|
||||
Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
|
||||
Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
|
||||
Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
|
||||
Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
|
||||
Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
|
||||
|
||||
// Enter slice 0
|
||||
CoreTiming::Advance();
|
||||
core_timing.Advance();
|
||||
|
||||
// D -> B -> C -> A -> E
|
||||
CoreTiming::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
|
||||
core_timing.ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
|
||||
// Manually force since ScheduleEventThreadsafe doesn't call it
|
||||
CoreTiming::ForceExceptionCheck(1000);
|
||||
REQUIRE(1000 == CoreTiming::GetDowncount());
|
||||
CoreTiming::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
|
||||
core_timing.ForceExceptionCheck(1000);
|
||||
REQUIRE(1000 == core_timing.GetDowncount());
|
||||
core_timing.ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
|
||||
// Manually force since ScheduleEventThreadsafe doesn't call it
|
||||
CoreTiming::ForceExceptionCheck(500);
|
||||
REQUIRE(500 == CoreTiming::GetDowncount());
|
||||
CoreTiming::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
|
||||
core_timing.ForceExceptionCheck(500);
|
||||
REQUIRE(500 == core_timing.GetDowncount());
|
||||
core_timing.ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
|
||||
// Manually force since ScheduleEventThreadsafe doesn't call it
|
||||
CoreTiming::ForceExceptionCheck(800);
|
||||
REQUIRE(500 == CoreTiming::GetDowncount());
|
||||
CoreTiming::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
|
||||
core_timing.ForceExceptionCheck(800);
|
||||
REQUIRE(500 == core_timing.GetDowncount());
|
||||
core_timing.ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
|
||||
// Manually force since ScheduleEventThreadsafe doesn't call it
|
||||
CoreTiming::ForceExceptionCheck(100);
|
||||
REQUIRE(100 == CoreTiming::GetDowncount());
|
||||
CoreTiming::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
|
||||
core_timing.ForceExceptionCheck(100);
|
||||
REQUIRE(100 == core_timing.GetDowncount());
|
||||
core_timing.ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
|
||||
// Manually force since ScheduleEventThreadsafe doesn't call it
|
||||
CoreTiming::ForceExceptionCheck(1200);
|
||||
REQUIRE(100 == CoreTiming::GetDowncount());
|
||||
core_timing.ForceExceptionCheck(1200);
|
||||
REQUIRE(100 == core_timing.GetDowncount());
|
||||
|
||||
AdvanceAndCheck(3, 400);
|
||||
AdvanceAndCheck(1, 300);
|
||||
AdvanceAndCheck(2, 200);
|
||||
AdvanceAndCheck(0, 200);
|
||||
AdvanceAndCheck(4, MAX_SLICE_LENGTH);
|
||||
AdvanceAndCheck(core_timing, 3, 400);
|
||||
AdvanceAndCheck(core_timing, 1, 300);
|
||||
AdvanceAndCheck(core_timing, 2, 200);
|
||||
AdvanceAndCheck(core_timing, 0, 200);
|
||||
AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
|
||||
}
|
||||
|
||||
namespace SharedSlotTest {
|
||||
@@ -142,59 +145,63 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
|
||||
using namespace SharedSlotTest;
|
||||
|
||||
ScopeInit guard;
|
||||
auto& core_timing = guard.core_timing;
|
||||
|
||||
CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", FifoCallback<0>);
|
||||
CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", FifoCallback<1>);
|
||||
CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", FifoCallback<2>);
|
||||
CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", FifoCallback<3>);
|
||||
CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", FifoCallback<4>);
|
||||
Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", FifoCallback<0>);
|
||||
Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", FifoCallback<1>);
|
||||
Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", FifoCallback<2>);
|
||||
Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", FifoCallback<3>);
|
||||
Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", FifoCallback<4>);
|
||||
|
||||
CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
|
||||
CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
|
||||
CoreTiming::ScheduleEvent(1000, cb_c, CB_IDS[2]);
|
||||
CoreTiming::ScheduleEvent(1000, cb_d, CB_IDS[3]);
|
||||
CoreTiming::ScheduleEvent(1000, cb_e, CB_IDS[4]);
|
||||
core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
|
||||
core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
|
||||
core_timing.ScheduleEvent(1000, cb_c, CB_IDS[2]);
|
||||
core_timing.ScheduleEvent(1000, cb_d, CB_IDS[3]);
|
||||
core_timing.ScheduleEvent(1000, cb_e, CB_IDS[4]);
|
||||
|
||||
// Enter slice 0
|
||||
CoreTiming::Advance();
|
||||
REQUIRE(1000 == CoreTiming::GetDowncount());
|
||||
core_timing.Advance();
|
||||
REQUIRE(1000 == core_timing.GetDowncount());
|
||||
|
||||
callbacks_ran_flags = 0;
|
||||
counter = 0;
|
||||
lateness = 0;
|
||||
CoreTiming::AddTicks(CoreTiming::GetDowncount());
|
||||
CoreTiming::Advance();
|
||||
REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
|
||||
core_timing.AddTicks(core_timing.GetDowncount());
|
||||
core_timing.Advance();
|
||||
REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
|
||||
REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
|
||||
}
|
||||
|
||||
TEST_CASE("CoreTiming[PredictableLateness]", "[core]") {
|
||||
TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
|
||||
ScopeInit guard;
|
||||
auto& core_timing = guard.core_timing;
|
||||
|
||||
CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
|
||||
CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
|
||||
Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
|
||||
Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
|
||||
|
||||
// Enter slice 0
|
||||
CoreTiming::Advance();
|
||||
core_timing.Advance();
|
||||
|
||||
CoreTiming::ScheduleEvent(100, cb_a, CB_IDS[0]);
|
||||
CoreTiming::ScheduleEvent(200, cb_b, CB_IDS[1]);
|
||||
core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]);
|
||||
core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]);
|
||||
|
||||
AdvanceAndCheck(0, 90, 10, -10); // (100 - 10)
|
||||
AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50);
|
||||
AdvanceAndCheck(core_timing, 0, 90, 10, -10); // (100 - 10)
|
||||
AdvanceAndCheck(core_timing, 1, MAX_SLICE_LENGTH, 50, -50);
|
||||
}
|
||||
|
||||
namespace ChainSchedulingTest {
|
||||
static int reschedules = 0;
|
||||
|
||||
static void RescheduleCallback(u64 userdata, s64 cycles_late) {
|
||||
static void RescheduleCallback(Core::Timing::CoreTiming& core_timing, u64 userdata,
|
||||
s64 cycles_late) {
|
||||
--reschedules;
|
||||
REQUIRE(reschedules >= 0);
|
||||
REQUIRE(lateness == cycles_late);
|
||||
|
||||
if (reschedules > 0)
|
||||
CoreTiming::ScheduleEvent(1000, reinterpret_cast<CoreTiming::EventType*>(userdata),
|
||||
if (reschedules > 0) {
|
||||
core_timing.ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata),
|
||||
userdata);
|
||||
}
|
||||
}
|
||||
} // namespace ChainSchedulingTest
|
||||
|
||||
@@ -202,36 +209,39 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
|
||||
using namespace ChainSchedulingTest;
|
||||
|
||||
ScopeInit guard;
|
||||
auto& core_timing = guard.core_timing;
|
||||
|
||||
CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
|
||||
CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
|
||||
CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
|
||||
CoreTiming::EventType* cb_rs =
|
||||
CoreTiming::RegisterEvent("callbackReschedule", RescheduleCallback);
|
||||
Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
|
||||
Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
|
||||
Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
|
||||
Core::Timing::EventType* cb_rs = core_timing.RegisterEvent(
|
||||
"callbackReschedule", [&core_timing](u64 userdata, s64 cycles_late) {
|
||||
RescheduleCallback(core_timing, userdata, cycles_late);
|
||||
});
|
||||
|
||||
// Enter slice 0
|
||||
CoreTiming::Advance();
|
||||
core_timing.Advance();
|
||||
|
||||
CoreTiming::ScheduleEvent(800, cb_a, CB_IDS[0]);
|
||||
CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
|
||||
CoreTiming::ScheduleEvent(2200, cb_c, CB_IDS[2]);
|
||||
CoreTiming::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
|
||||
REQUIRE(800 == CoreTiming::GetDowncount());
|
||||
core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]);
|
||||
core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
|
||||
core_timing.ScheduleEvent(2200, cb_c, CB_IDS[2]);
|
||||
core_timing.ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
|
||||
REQUIRE(800 == core_timing.GetDowncount());
|
||||
|
||||
reschedules = 3;
|
||||
AdvanceAndCheck(0, 200); // cb_a
|
||||
AdvanceAndCheck(1, 1000); // cb_b, cb_rs
|
||||
AdvanceAndCheck(core_timing, 0, 200); // cb_a
|
||||
AdvanceAndCheck(core_timing, 1, 1000); // cb_b, cb_rs
|
||||
REQUIRE(2 == reschedules);
|
||||
|
||||
CoreTiming::AddTicks(CoreTiming::GetDowncount());
|
||||
CoreTiming::Advance(); // cb_rs
|
||||
core_timing.AddTicks(core_timing.GetDowncount());
|
||||
core_timing.Advance(); // cb_rs
|
||||
REQUIRE(1 == reschedules);
|
||||
REQUIRE(200 == CoreTiming::GetDowncount());
|
||||
REQUIRE(200 == core_timing.GetDowncount());
|
||||
|
||||
AdvanceAndCheck(2, 800); // cb_c
|
||||
AdvanceAndCheck(core_timing, 2, 800); // cb_c
|
||||
|
||||
CoreTiming::AddTicks(CoreTiming::GetDowncount());
|
||||
CoreTiming::Advance(); // cb_rs
|
||||
core_timing.AddTicks(core_timing.GetDowncount());
|
||||
core_timing.Advance(); // cb_rs
|
||||
REQUIRE(0 == reschedules);
|
||||
REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
|
||||
REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
|
||||
}
|
||||
|
||||
@@ -5,12 +5,12 @@ add_library(video_core STATIC
|
||||
debug_utils/debug_utils.h
|
||||
engines/fermi_2d.cpp
|
||||
engines/fermi_2d.h
|
||||
engines/kepler_compute.cpp
|
||||
engines/kepler_compute.h
|
||||
engines/kepler_memory.cpp
|
||||
engines/kepler_memory.h
|
||||
engines/maxwell_3d.cpp
|
||||
engines/maxwell_3d.h
|
||||
engines/maxwell_compute.cpp
|
||||
engines/maxwell_compute.h
|
||||
engines/maxwell_dma.cpp
|
||||
engines/maxwell_dma.h
|
||||
engines/shader_bytecode.h
|
||||
@@ -101,6 +101,16 @@ add_library(video_core STATIC
|
||||
video_core.h
|
||||
)
|
||||
|
||||
if (ENABLE_VULKAN)
|
||||
target_sources(video_core PRIVATE
|
||||
renderer_vulkan/declarations.h
|
||||
renderer_vulkan/vk_device.cpp
|
||||
renderer_vulkan/vk_device.h)
|
||||
|
||||
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
|
||||
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
|
||||
endif()
|
||||
|
||||
create_target_directory_groups(video_core)
|
||||
|
||||
target_link_libraries(video_core PUBLIC common core)
|
||||
|
||||
34
src/video_core/engines/kepler_compute.cpp
Normal file
34
src/video_core/engines/kepler_compute.cpp
Normal file
@@ -0,0 +1,34 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
|
||||
|
||||
KeplerCompute::~KeplerCompute() = default;
|
||||
|
||||
void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
|
||||
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
|
||||
"Invalid KeplerCompute register, increase the size of the Regs structure");
|
||||
|
||||
regs.reg_array[method_call.method] = method_call.argument;
|
||||
|
||||
switch (method_call.method) {
|
||||
case KEPLER_COMPUTE_REG_INDEX(launch):
|
||||
// Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
|
||||
// kernels)
|
||||
UNREACHABLE_MSG("Compute shaders are not implemented");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
@@ -10,47 +10,48 @@
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
#define MAXWELL_COMPUTE_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32))
|
||||
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class MaxwellCompute final {
|
||||
class KeplerCompute final {
|
||||
public:
|
||||
MaxwellCompute() = default;
|
||||
~MaxwellCompute() = default;
|
||||
explicit KeplerCompute(MemoryManager& memory_manager);
|
||||
~KeplerCompute();
|
||||
|
||||
static constexpr std::size_t NumConstBuffers = 8;
|
||||
|
||||
struct Regs {
|
||||
static constexpr std::size_t NUM_REGS = 0xCF8;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS(0x281);
|
||||
INSERT_PADDING_WORDS(0xAF);
|
||||
|
||||
union {
|
||||
u32 compute_end;
|
||||
BitField<0, 1, u32> unknown;
|
||||
} compute;
|
||||
u32 launch;
|
||||
|
||||
INSERT_PADDING_WORDS(0xA76);
|
||||
INSERT_PADDING_WORDS(0xC48);
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
|
||||
"MaxwellCompute Regs has wrong size");
|
||||
"KeplerCompute Regs has wrong size");
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(const GPU::MethodCall& method_call);
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4, \
|
||||
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(compute, 0x281);
|
||||
ASSERT_REG_POSITION(launch, 0xAF);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
@@ -317,7 +317,7 @@ void Maxwell3D::ProcessQueryGet() {
|
||||
LongQueryResult query_result{};
|
||||
query_result.value = result;
|
||||
// TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
|
||||
query_result.timestamp = CoreTiming::GetTicks();
|
||||
query_result.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
|
||||
Memory::WriteBlock(*address, &query_result, sizeof(query_result));
|
||||
}
|
||||
dirty_flags.OnMemoryWrite();
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/maxwell_compute.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) {
|
||||
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
|
||||
"Invalid MaxwellCompute register, increase the size of the Regs structure");
|
||||
|
||||
regs.reg_array[method_call.method] = method_call.argument;
|
||||
|
||||
switch (method_call.method) {
|
||||
case MAXWELL_COMPUTE_REG_INDEX(compute): {
|
||||
LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented");
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
@@ -186,7 +186,7 @@ enum class SubOp : u64 {
|
||||
};
|
||||
|
||||
enum class F2iRoundingOp : u64 {
|
||||
None = 0,
|
||||
RoundEven = 0,
|
||||
Floor = 1,
|
||||
Ceil = 2,
|
||||
Trunc = 3,
|
||||
|
||||
@@ -3,12 +3,13 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_compute.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
@@ -18,6 +19,7 @@ namespace Tegra {
|
||||
u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
|
||||
switch (format) {
|
||||
case PixelFormat::ABGR8:
|
||||
case PixelFormat::BGRA8:
|
||||
return 4;
|
||||
default:
|
||||
return 4;
|
||||
@@ -31,7 +33,7 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
|
||||
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
|
||||
maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
|
||||
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
|
||||
maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
|
||||
kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
|
||||
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
|
||||
kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
|
||||
}
|
||||
@@ -245,8 +247,8 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
|
||||
case EngineID::MAXWELL_B:
|
||||
maxwell_3d->CallMethod(method_call);
|
||||
break;
|
||||
case EngineID::MAXWELL_COMPUTE_B:
|
||||
maxwell_compute->CallMethod(method_call);
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
kepler_compute->CallMethod(method_call);
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
maxwell_dma->CallMethod(method_call);
|
||||
@@ -282,7 +284,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
|
||||
block.sequence = regs.semaphore_sequence;
|
||||
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
|
||||
// CoreTiming
|
||||
block.timestamp = CoreTiming::GetTicks();
|
||||
block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
|
||||
Memory::WriteBlock(*address, &block, sizeof(block));
|
||||
} else {
|
||||
const auto address =
|
||||
|
||||
@@ -80,6 +80,7 @@ class DebugContext;
|
||||
struct FramebufferConfig {
|
||||
enum class PixelFormat : u32 {
|
||||
ABGR8 = 1,
|
||||
BGRA8 = 5,
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -102,15 +103,15 @@ struct FramebufferConfig {
|
||||
namespace Engines {
|
||||
class Fermi2D;
|
||||
class Maxwell3D;
|
||||
class MaxwellCompute;
|
||||
class MaxwellDMA;
|
||||
class KeplerCompute;
|
||||
class KeplerMemory;
|
||||
} // namespace Engines
|
||||
|
||||
enum class EngineID {
|
||||
FERMI_TWOD_A = 0x902D, // 2D Engine
|
||||
MAXWELL_B = 0xB197, // 3D Engine
|
||||
MAXWELL_COMPUTE_B = 0xB1C0,
|
||||
KEPLER_COMPUTE_B = 0xB1C0,
|
||||
KEPLER_INLINE_TO_MEMORY_B = 0xA140,
|
||||
MAXWELL_DMA_COPY_A = 0xB0B5,
|
||||
};
|
||||
@@ -208,7 +209,7 @@ private:
|
||||
/// 2D engine
|
||||
std::unique_ptr<Engines::Fermi2D> fermi_2d;
|
||||
/// Compute engine
|
||||
std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
|
||||
std::unique_ptr<Engines::KeplerCompute> kepler_compute;
|
||||
/// DMA engine
|
||||
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
|
||||
/// Inline memory engine
|
||||
|
||||
@@ -125,6 +125,9 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
|
||||
|
||||
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
|
||||
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
|
||||
if (!params.is_tiled) {
|
||||
params.pitch = config.tic.Pitch();
|
||||
}
|
||||
params.unaligned_height = config.tic.Height();
|
||||
params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
|
||||
params.identity = SurfaceClass::Uploaded;
|
||||
@@ -191,7 +194,13 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
|
||||
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
|
||||
params.component_type = ComponentTypeFromRenderTarget(config.format);
|
||||
params.type = GetFormatType(params.pixel_format);
|
||||
params.width = config.width;
|
||||
if (params.is_tiled) {
|
||||
params.width = config.width;
|
||||
} else {
|
||||
params.pitch = config.width;
|
||||
const u32 bpp = params.GetFormatBpp() / 8;
|
||||
params.width = params.pitch / bpp;
|
||||
}
|
||||
params.height = config.height;
|
||||
params.unaligned_height = config.height;
|
||||
params.target = SurfaceTarget::Texture2D;
|
||||
@@ -694,9 +703,20 @@ void CachedSurface::LoadGLBuffer() {
|
||||
for (u32 i = 0; i < params.max_mip_level; i++)
|
||||
SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
|
||||
} else {
|
||||
const auto texture_src_data{Memory::GetPointer(params.addr)};
|
||||
const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
|
||||
gl_buffer[0].assign(texture_src_data, texture_src_data_end);
|
||||
const u32 bpp = params.GetFormatBpp() / 8;
|
||||
const u32 copy_size = params.width * bpp;
|
||||
if (params.pitch == copy_size) {
|
||||
std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr),
|
||||
params.size_in_bytes_gl);
|
||||
} else {
|
||||
const u8* start = Memory::GetPointer(params.addr);
|
||||
u8* write_to = gl_buffer[0].data();
|
||||
for (u32 h = params.height; h > 0; h--) {
|
||||
std::memcpy(write_to, start, copy_size);
|
||||
start += params.pitch;
|
||||
write_to += copy_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (u32 i = 0; i < params.max_mip_level; i++) {
|
||||
ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
|
||||
@@ -733,7 +753,19 @@ void CachedSurface::FlushGLBuffer() {
|
||||
|
||||
SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
|
||||
} else {
|
||||
std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
|
||||
const u32 bpp = params.GetFormatBpp() / 8;
|
||||
const u32 copy_size = params.width * bpp;
|
||||
if (params.pitch == copy_size) {
|
||||
std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes());
|
||||
} else {
|
||||
u8* start = Memory::GetPointer(params.addr);
|
||||
const u8* read_to = gl_buffer[0].data();
|
||||
for (u32 h = params.height; h > 0; h--) {
|
||||
std::memcpy(start, read_to, copy_size);
|
||||
start += params.pitch;
|
||||
read_to += copy_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -859,8 +891,8 @@ void CachedSurface::EnsureTextureView() {
|
||||
constexpr GLuint min_level = 0;
|
||||
|
||||
glGenTextures(1, &texture_view.handle);
|
||||
glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0,
|
||||
params.max_mip_level, 0, 1);
|
||||
glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level,
|
||||
params.max_mip_level, min_layer, num_layers);
|
||||
ApplyTextureDefaults(texture_view.handle, params.max_mip_level);
|
||||
glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
|
||||
reinterpret_cast<const GLint*>(swizzle.data()));
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
@@ -35,7 +36,6 @@ using PixelFormat = VideoCore::Surface::PixelFormat;
|
||||
using ComponentType = VideoCore::Surface::ComponentType;
|
||||
|
||||
struct SurfaceParams {
|
||||
|
||||
enum class SurfaceClass {
|
||||
Uploaded,
|
||||
RenderTarget,
|
||||
@@ -168,20 +168,27 @@ struct SurfaceParams {
|
||||
}
|
||||
|
||||
u32 MipBlockDepth(u32 mip_level) const {
|
||||
if (mip_level == 0)
|
||||
if (mip_level == 0) {
|
||||
return block_depth;
|
||||
if (is_layered)
|
||||
}
|
||||
|
||||
if (is_layered) {
|
||||
return 1;
|
||||
u32 depth = MipDepth(mip_level);
|
||||
}
|
||||
|
||||
const u32 mip_depth = MipDepth(mip_level);
|
||||
u32 bd = 32;
|
||||
while (bd > 1 && depth * 2 <= bd) {
|
||||
while (bd > 1 && mip_depth * 2 <= bd) {
|
||||
bd >>= 1;
|
||||
}
|
||||
|
||||
if (bd == 32) {
|
||||
u32 bh = MipBlockHeight(mip_level);
|
||||
if (bh >= 4)
|
||||
const u32 bh = MipBlockHeight(mip_level);
|
||||
if (bh >= 4) {
|
||||
return 16;
|
||||
}
|
||||
}
|
||||
|
||||
return bd;
|
||||
}
|
||||
|
||||
@@ -272,6 +279,7 @@ struct SurfaceParams {
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 unaligned_height;
|
||||
u32 pitch;
|
||||
SurfaceTarget target;
|
||||
SurfaceClass identity;
|
||||
u32 max_mip_level;
|
||||
|
||||
@@ -171,7 +171,7 @@ public:
|
||||
code.AddLine(fmt::format("case 0x{:x}u: {{", address));
|
||||
++code.scope;
|
||||
|
||||
VisitBasicBlock(bb);
|
||||
VisitBlock(bb);
|
||||
|
||||
--code.scope;
|
||||
code.AddLine('}');
|
||||
@@ -423,7 +423,7 @@ private:
|
||||
code.AddNewLine();
|
||||
}
|
||||
|
||||
void VisitBasicBlock(const BasicBlock& bb) {
|
||||
void VisitBlock(const NodeBlock& bb) {
|
||||
for (const Node node : bb) {
|
||||
if (const std::string expr = Visit(node); !expr.empty()) {
|
||||
code.AddLine(expr);
|
||||
@@ -575,7 +575,7 @@ private:
|
||||
code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {");
|
||||
++code.scope;
|
||||
|
||||
VisitBasicBlock(conditional->GetCode());
|
||||
VisitBlock(conditional->GetCode());
|
||||
|
||||
--code.scope;
|
||||
code.AddLine('}');
|
||||
@@ -616,17 +616,8 @@ private:
|
||||
|
||||
std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
|
||||
std::string value = VisitOperand(operation, operand_index);
|
||||
|
||||
switch (type) {
|
||||
case Type::Bool:
|
||||
case Type::Bool2:
|
||||
case Type::Float:
|
||||
return value;
|
||||
case Type::Int:
|
||||
return "ftoi(" + value + ')';
|
||||
case Type::Uint:
|
||||
return "ftou(" + value + ')';
|
||||
case Type::HalfFloat:
|
||||
case Type::HalfFloat: {
|
||||
const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
|
||||
if (!half_meta) {
|
||||
value = "toHalf2(" + value + ')';
|
||||
@@ -643,6 +634,26 @@ private:
|
||||
return "vec2(toHalf2(" + value + ")[1])";
|
||||
}
|
||||
}
|
||||
default:
|
||||
return CastOperand(value, type);
|
||||
}
|
||||
}
|
||||
|
||||
std::string CastOperand(const std::string& value, Type type) const {
|
||||
switch (type) {
|
||||
case Type::Bool:
|
||||
case Type::Bool2:
|
||||
case Type::Float:
|
||||
return value;
|
||||
case Type::Int:
|
||||
return "ftoi(" + value + ')';
|
||||
case Type::Uint:
|
||||
return "ftou(" + value + ')';
|
||||
case Type::HalfFloat:
|
||||
// Can't be handled as a stand-alone value
|
||||
UNREACHABLE();
|
||||
return value;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return value;
|
||||
}
|
||||
@@ -650,6 +661,7 @@ private:
|
||||
std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
|
||||
switch (type) {
|
||||
case Type::Bool:
|
||||
case Type::Bool2:
|
||||
case Type::Float:
|
||||
if (needs_parenthesis) {
|
||||
return '(' + value + ')';
|
||||
@@ -719,45 +731,51 @@ private:
|
||||
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
|
||||
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
const auto count = static_cast<u32>(operation.GetOperandsCount());
|
||||
ASSERT(meta);
|
||||
|
||||
const std::size_t count = operation.GetOperandsCount();
|
||||
const bool has_array = meta->sampler.IsArray();
|
||||
const bool has_shadow = meta->sampler.IsShadow();
|
||||
|
||||
std::string expr = func;
|
||||
expr += '(';
|
||||
expr += GetSampler(meta->sampler);
|
||||
expr += ", ";
|
||||
|
||||
expr += coord_constructors[meta->coords_count - 1];
|
||||
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
|
||||
expr += '(';
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
const bool is_extra = i >= meta->coords_count;
|
||||
const bool is_array = i == meta->array_index;
|
||||
for (std::size_t i = 0; i < count; ++i) {
|
||||
expr += Visit(operation[i]);
|
||||
|
||||
std::string operand = [&]() {
|
||||
if (is_extra && is_extra_int) {
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) {
|
||||
return std::to_string(static_cast<s32>(immediate->GetValue()));
|
||||
} else {
|
||||
return "ftoi(" + Visit(operation[i]) + ')';
|
||||
}
|
||||
} else {
|
||||
return Visit(operation[i]);
|
||||
}
|
||||
}();
|
||||
if (is_array) {
|
||||
ASSERT(!is_extra);
|
||||
operand = "float(ftoi(" + operand + "))";
|
||||
}
|
||||
|
||||
expr += operand;
|
||||
|
||||
if (i + 1 == meta->coords_count) {
|
||||
expr += ')';
|
||||
}
|
||||
if (i + 1 < count) {
|
||||
const std::size_t next = i + 1;
|
||||
if (next < count || has_array || has_shadow)
|
||||
expr += ", ";
|
||||
}
|
||||
if (has_array) {
|
||||
expr += "float(ftoi(" + Visit(meta->array) + "))";
|
||||
}
|
||||
if (has_shadow) {
|
||||
if (has_array)
|
||||
expr += ", ";
|
||||
expr += Visit(meta->depth_compare);
|
||||
}
|
||||
expr += ')';
|
||||
|
||||
for (const Node extra : meta->extras) {
|
||||
expr += ", ";
|
||||
if (is_extra_int) {
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(extra)) {
|
||||
// Inline the string as an immediate integer in GLSL (some extra arguments are
|
||||
// required to be constant)
|
||||
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
|
||||
} else {
|
||||
expr += "ftoi(" + Visit(extra) + ')';
|
||||
}
|
||||
} else {
|
||||
expr += Visit(extra);
|
||||
}
|
||||
}
|
||||
|
||||
expr += ')';
|
||||
return expr;
|
||||
}
|
||||
@@ -1134,7 +1152,7 @@ private:
|
||||
Type::HalfFloat);
|
||||
}
|
||||
|
||||
std::string F4Texture(Operation operation) {
|
||||
std::string Texture(Operation operation) {
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
@@ -1145,7 +1163,7 @@ private:
|
||||
return expr + GetSwizzle(meta->element);
|
||||
}
|
||||
|
||||
std::string F4TextureLod(Operation operation) {
|
||||
std::string TextureLod(Operation operation) {
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
@@ -1156,7 +1174,7 @@ private:
|
||||
return expr + GetSwizzle(meta->element);
|
||||
}
|
||||
|
||||
std::string F4TextureGather(Operation operation) {
|
||||
std::string TextureGather(Operation operation) {
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
@@ -1164,7 +1182,7 @@ private:
|
||||
GetSwizzle(meta->element);
|
||||
}
|
||||
|
||||
std::string F4TextureQueryDimensions(Operation operation) {
|
||||
std::string TextureQueryDimensions(Operation operation) {
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
@@ -1184,7 +1202,7 @@ private:
|
||||
return "0";
|
||||
}
|
||||
|
||||
std::string F4TextureQueryLod(Operation operation) {
|
||||
std::string TextureQueryLod(Operation operation) {
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
@@ -1195,29 +1213,33 @@ private:
|
||||
return "0";
|
||||
}
|
||||
|
||||
std::string F4TexelFetch(Operation operation) {
|
||||
std::string TexelFetch(Operation operation) {
|
||||
constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
const auto count = static_cast<u32>(operation.GetOperandsCount());
|
||||
ASSERT(meta);
|
||||
UNIMPLEMENTED_IF(meta->sampler.IsArray());
|
||||
const std::size_t count = operation.GetOperandsCount();
|
||||
|
||||
std::string expr = "texelFetch(";
|
||||
expr += GetSampler(meta->sampler);
|
||||
expr += ", ";
|
||||
|
||||
expr += constructors[meta->coords_count - 1];
|
||||
expr += constructors.at(operation.GetOperandsCount() - 1);
|
||||
expr += '(';
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
for (std::size_t i = 0; i < count; ++i) {
|
||||
expr += VisitOperand(operation, i, Type::Int);
|
||||
|
||||
if (i + 1 == meta->coords_count) {
|
||||
const std::size_t next = i + 1;
|
||||
if (next == count)
|
||||
expr += ')';
|
||||
}
|
||||
if (i + 1 < count) {
|
||||
else if (next < count)
|
||||
expr += ", ";
|
||||
}
|
||||
}
|
||||
for (std::size_t i = 0; i < meta->extras.size(); ++i) {
|
||||
expr += ", ";
|
||||
expr += CastOperand(Visit(meta->extras.at(i)), Type::Int);
|
||||
}
|
||||
expr += ')';
|
||||
|
||||
return expr + GetSwizzle(meta->element);
|
||||
}
|
||||
|
||||
@@ -1454,12 +1476,12 @@ private:
|
||||
&GLSLDecompiler::Logical2HNotEqual,
|
||||
&GLSLDecompiler::Logical2HGreaterEqual,
|
||||
|
||||
&GLSLDecompiler::F4Texture,
|
||||
&GLSLDecompiler::F4TextureLod,
|
||||
&GLSLDecompiler::F4TextureGather,
|
||||
&GLSLDecompiler::F4TextureQueryDimensions,
|
||||
&GLSLDecompiler::F4TextureQueryLod,
|
||||
&GLSLDecompiler::F4TexelFetch,
|
||||
&GLSLDecompiler::Texture,
|
||||
&GLSLDecompiler::TextureLod,
|
||||
&GLSLDecompiler::TextureGather,
|
||||
&GLSLDecompiler::TextureQueryDimensions,
|
||||
&GLSLDecompiler::TextureQueryLod,
|
||||
&GLSLDecompiler::TexelFetch,
|
||||
|
||||
&GLSLDecompiler::Branch,
|
||||
&GLSLDecompiler::PushFlowStack,
|
||||
|
||||
@@ -107,7 +107,7 @@ RendererOpenGL::~RendererOpenGL() = default;
|
||||
void RendererOpenGL::SwapBuffers(
|
||||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
|
||||
|
||||
Core::System::GetInstance().GetPerfStats().EndSystemFrame();
|
||||
system.GetPerfStats().EndSystemFrame();
|
||||
|
||||
// Maintain the rasterizer's state as a priority
|
||||
OpenGLState prev_state = OpenGLState::GetCurState();
|
||||
@@ -137,8 +137,8 @@ void RendererOpenGL::SwapBuffers(
|
||||
|
||||
render_window.PollEvents();
|
||||
|
||||
Core::System::GetInstance().FrameLimiter().DoFrameLimiting(CoreTiming::GetGlobalTimeUs());
|
||||
Core::System::GetInstance().GetPerfStats().BeginSystemFrame();
|
||||
system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
|
||||
system.GetPerfStats().BeginSystemFrame();
|
||||
|
||||
// Restore the rasterizer state
|
||||
prev_state.Apply();
|
||||
|
||||
45
src/video_core/renderer_vulkan/declarations.h
Normal file
45
src/video_core/renderer_vulkan/declarations.h
Normal file
@@ -0,0 +1,45 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vulkan/vulkan.hpp>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
// vulkan.hpp unique handlers use DispatchLoaderStatic
|
||||
template <typename T>
|
||||
using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>;
|
||||
|
||||
using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>;
|
||||
using UniqueBuffer = UniqueHandle<vk::Buffer>;
|
||||
using UniqueBufferView = UniqueHandle<vk::BufferView>;
|
||||
using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>;
|
||||
using UniqueCommandPool = UniqueHandle<vk::CommandPool>;
|
||||
using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>;
|
||||
using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>;
|
||||
using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>;
|
||||
using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>;
|
||||
using UniqueDevice = UniqueHandle<vk::Device>;
|
||||
using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>;
|
||||
using UniqueEvent = UniqueHandle<vk::Event>;
|
||||
using UniqueFence = UniqueHandle<vk::Fence>;
|
||||
using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
|
||||
using UniqueImage = UniqueHandle<vk::Image>;
|
||||
using UniqueImageView = UniqueHandle<vk::ImageView>;
|
||||
using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
|
||||
using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
|
||||
using UniquePipeline = UniqueHandle<vk::Pipeline>;
|
||||
using UniquePipelineCache = UniqueHandle<vk::PipelineCache>;
|
||||
using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>;
|
||||
using UniqueQueryPool = UniqueHandle<vk::QueryPool>;
|
||||
using UniqueRenderPass = UniqueHandle<vk::RenderPass>;
|
||||
using UniqueSampler = UniqueHandle<vk::Sampler>;
|
||||
using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
|
||||
using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
|
||||
using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
|
||||
using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
|
||||
using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
|
||||
|
||||
} // namespace Vulkan
|
||||
231
src/video_core/renderer_vulkan/vk_device.cpp
Normal file
231
src/video_core/renderer_vulkan/vk_device.cpp
Normal file
@@ -0,0 +1,231 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace Alternatives {
|
||||
|
||||
constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
|
||||
vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
|
||||
constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
|
||||
vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
|
||||
|
||||
} // namespace Alternatives
|
||||
|
||||
constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
|
||||
switch (format) {
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
return Alternatives::Depth24UnormS8Uint.data();
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
return Alternatives::Depth16UnormS8Uint.data();
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
|
||||
FormatType format_type) {
|
||||
switch (format_type) {
|
||||
case FormatType::Linear:
|
||||
return properties.linearTilingFeatures;
|
||||
case FormatType::Optimal:
|
||||
return properties.optimalTilingFeatures;
|
||||
case FormatType::Buffer:
|
||||
return properties.bufferFeatures;
|
||||
default:
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface)
|
||||
: physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
|
||||
SetupFamilies(dldi, surface);
|
||||
SetupProperties(dldi);
|
||||
}
|
||||
|
||||
VKDevice::~VKDevice() = default;
|
||||
|
||||
bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
|
||||
const auto queue_cis = GetDeviceQueueCreateInfos();
|
||||
vk::PhysicalDeviceFeatures device_features{};
|
||||
|
||||
const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
|
||||
const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
|
||||
0, nullptr, static_cast<u32>(extensions.size()),
|
||||
extensions.data(), &device_features);
|
||||
vk::Device dummy_logical;
|
||||
if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
|
||||
return false;
|
||||
}
|
||||
|
||||
dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
|
||||
logical = UniqueDevice(
|
||||
dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
|
||||
|
||||
graphics_queue = logical->getQueue(graphics_family, 0, dld);
|
||||
present_queue = logical->getQueue(present_family, 0, dld);
|
||||
return true;
|
||||
}
|
||||
|
||||
vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
|
||||
vk::FormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const {
|
||||
if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
|
||||
return wanted_format;
|
||||
}
|
||||
// The wanted format is not supported by hardware, search for alternatives
|
||||
const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
|
||||
if (alternatives == nullptr) {
|
||||
LOG_CRITICAL(Render_Vulkan,
|
||||
"Format={} with usage={} and type={} has no defined alternatives and host "
|
||||
"hardware does not support it",
|
||||
static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
|
||||
static_cast<u32>(format_type));
|
||||
UNREACHABLE();
|
||||
return wanted_format;
|
||||
}
|
||||
|
||||
std::size_t i = 0;
|
||||
for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
|
||||
alternative = alternatives[++i]) {
|
||||
if (!IsFormatSupported(alternative, wanted_usage, format_type))
|
||||
continue;
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Emulating format={} with alternative format={} with usage={} and type={}",
|
||||
static_cast<u32>(wanted_format), static_cast<u32>(alternative),
|
||||
static_cast<u32>(wanted_usage), static_cast<u32>(format_type));
|
||||
return alternative;
|
||||
}
|
||||
|
||||
// No alternatives found, panic
|
||||
LOG_CRITICAL(Render_Vulkan,
|
||||
"Format={} with usage={} and type={} is not supported by the host hardware and "
|
||||
"doesn't support any of the alternatives",
|
||||
static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
|
||||
static_cast<u32>(format_type));
|
||||
UNREACHABLE();
|
||||
return wanted_format;
|
||||
}
|
||||
|
||||
bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const {
|
||||
const auto it = format_properties.find(wanted_format);
|
||||
if (it == format_properties.end()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}",
|
||||
static_cast<u32>(wanted_format));
|
||||
UNREACHABLE();
|
||||
return true;
|
||||
}
|
||||
const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
|
||||
return (supported_usage & wanted_usage) == wanted_usage;
|
||||
}
|
||||
|
||||
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface) {
|
||||
const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
|
||||
|
||||
bool has_swapchain{};
|
||||
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
|
||||
has_swapchain |= prop.extensionName == swapchain_extension;
|
||||
}
|
||||
if (!has_swapchain) {
|
||||
// The device doesn't support creating swapchains.
|
||||
return false;
|
||||
}
|
||||
|
||||
bool has_graphics{}, has_present{};
|
||||
const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
|
||||
for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
|
||||
const auto& family = queue_family_properties[i];
|
||||
if (family.queueCount == 0)
|
||||
continue;
|
||||
|
||||
has_graphics |=
|
||||
(family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
|
||||
has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
|
||||
}
|
||||
if (!has_graphics || !has_present) {
|
||||
// The device doesn't have a graphics and present queue.
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO(Rodrigo): Check if the device matches all requeriments.
|
||||
const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
|
||||
if (props.limits.maxUniformBufferRange < 65536) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Device is suitable.
|
||||
return true;
|
||||
}
|
||||
|
||||
void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
|
||||
std::optional<u32> graphics_family_, present_family_;
|
||||
|
||||
const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
|
||||
for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
|
||||
if (graphics_family_ && present_family_)
|
||||
break;
|
||||
|
||||
const auto& queue_family = queue_family_properties[i];
|
||||
if (queue_family.queueCount == 0)
|
||||
continue;
|
||||
|
||||
if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
|
||||
graphics_family_ = i;
|
||||
if (physical.getSurfaceSupportKHR(i, surface, dldi))
|
||||
present_family_ = i;
|
||||
}
|
||||
ASSERT(graphics_family_ && present_family_);
|
||||
|
||||
graphics_family = *graphics_family_;
|
||||
present_family = *present_family_;
|
||||
}
|
||||
|
||||
void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
|
||||
const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
|
||||
device_type = props.deviceType;
|
||||
uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
|
||||
}
|
||||
|
||||
std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
|
||||
static const float QUEUE_PRIORITY = 1.f;
|
||||
|
||||
std::set<u32> unique_queue_families = {graphics_family, present_family};
|
||||
std::vector<vk::DeviceQueueCreateInfo> queue_cis;
|
||||
|
||||
for (u32 queue_family : unique_queue_families)
|
||||
queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY});
|
||||
|
||||
return queue_cis;
|
||||
}
|
||||
|
||||
std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
|
||||
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
|
||||
std::map<vk::Format, vk::FormatProperties> format_properties;
|
||||
|
||||
const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
|
||||
format_properties.emplace(format, physical.getFormatProperties(format, dldi));
|
||||
};
|
||||
AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
|
||||
AddFormatQuery(vk::Format::eR5G6B5UnormPack16);
|
||||
AddFormatQuery(vk::Format::eD32Sfloat);
|
||||
AddFormatQuery(vk::Format::eD16UnormS8Uint);
|
||||
AddFormatQuery(vk::Format::eD24UnormS8Uint);
|
||||
AddFormatQuery(vk::Format::eD32SfloatS8Uint);
|
||||
|
||||
return format_properties;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
116
src/video_core/renderer_vulkan/vk_device.h
Normal file
116
src/video_core/renderer_vulkan/vk_device.h
Normal file
@@ -0,0 +1,116 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
/// Format usage descriptor
|
||||
enum class FormatType { Linear, Optimal, Buffer };
|
||||
|
||||
/// Handles data specific to a physical device.
|
||||
class VKDevice final {
|
||||
public:
|
||||
explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface);
|
||||
~VKDevice();
|
||||
|
||||
/// Initializes the device. Returns true on success.
|
||||
bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
|
||||
|
||||
/**
|
||||
* Returns a format supported by the device for the passed requeriments.
|
||||
* @param wanted_format The ideal format to be returned. It may not be the returned format.
|
||||
* @param wanted_usage The usage that must be fulfilled even if the format is not supported.
|
||||
* @param format_type Format type usage.
|
||||
* @returns A format supported by the device.
|
||||
*/
|
||||
vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const;
|
||||
|
||||
/// Returns the dispatch loader with direct function pointers of the device
|
||||
const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
|
||||
return dld;
|
||||
}
|
||||
|
||||
/// Returns the logical device
|
||||
vk::Device GetLogical() const {
|
||||
return logical.get();
|
||||
}
|
||||
|
||||
/// Returns the physical device.
|
||||
vk::PhysicalDevice GetPhysical() const {
|
||||
return physical;
|
||||
}
|
||||
|
||||
/// Returns the main graphics queue.
|
||||
vk::Queue GetGraphicsQueue() const {
|
||||
return graphics_queue;
|
||||
}
|
||||
|
||||
/// Returns the main present queue.
|
||||
vk::Queue GetPresentQueue() const {
|
||||
return present_queue;
|
||||
}
|
||||
|
||||
/// Returns main graphics queue family index.
|
||||
u32 GetGraphicsFamily() const {
|
||||
return graphics_family;
|
||||
}
|
||||
|
||||
/// Returns main present queue family index.
|
||||
u32 GetPresentFamily() const {
|
||||
return present_family;
|
||||
}
|
||||
|
||||
/// Returns if the device is integrated with the host CPU
|
||||
bool IsIntegrated() const {
|
||||
return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
|
||||
}
|
||||
|
||||
/// Returns uniform buffer alignment requeriment
|
||||
u64 GetUniformBufferAlignment() const {
|
||||
return uniform_buffer_alignment;
|
||||
}
|
||||
|
||||
/// Checks if the physical device is suitable.
|
||||
static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface);
|
||||
|
||||
private:
|
||||
/// Sets up queue families.
|
||||
void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
|
||||
|
||||
/// Sets up device properties.
|
||||
void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
|
||||
|
||||
/// Returns a list of queue initialization descriptors.
|
||||
std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
|
||||
|
||||
/// Returns true if a format is supported.
|
||||
bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const;
|
||||
|
||||
/// Returns the device properties for Vulkan formats.
|
||||
static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
|
||||
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
|
||||
|
||||
const vk::PhysicalDevice physical; ///< Physical device
|
||||
vk::DispatchLoaderDynamic dld; ///< Device function pointers
|
||||
UniqueDevice logical; ///< Logical device
|
||||
vk::Queue graphics_queue; ///< Main graphics queue
|
||||
vk::Queue present_queue; ///< Main present queue
|
||||
u32 graphics_family{}; ///< Main graphics queue family index
|
||||
u32 present_family{}; ///< Main present queue family index
|
||||
vk::PhysicalDeviceType device_type; ///< Physical device type
|
||||
u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment
|
||||
std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -121,15 +121,15 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
|
||||
return exit_method = ExitMethod::AlwaysReturn;
|
||||
}
|
||||
|
||||
BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
|
||||
BasicBlock basic_block;
|
||||
NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
|
||||
NodeBlock basic_block;
|
||||
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
|
||||
pc = DecodeInstr(basic_block, pc);
|
||||
}
|
||||
return basic_block;
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
|
||||
u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||
// Ignore sched instructions when generating code.
|
||||
if (IsSchedInstruction(pc, main_offset)) {
|
||||
return pc + 1;
|
||||
@@ -151,39 +151,38 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
|
||||
UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
|
||||
"NeverExecute predicate not implemented");
|
||||
|
||||
static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)>
|
||||
decoders = {
|
||||
{OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
|
||||
{OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
|
||||
{OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
|
||||
{OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
|
||||
{OpCode::Type::Shift, &ShaderIR::DecodeShift},
|
||||
{OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
|
||||
{OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
|
||||
{OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
|
||||
{OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
|
||||
{OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
|
||||
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
|
||||
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
|
||||
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
|
||||
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
|
||||
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
|
||||
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
|
||||
{OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
|
||||
{OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
|
||||
{OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
|
||||
{OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
|
||||
{OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
|
||||
{OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
|
||||
{OpCode::Type::Video, &ShaderIR::DecodeVideo},
|
||||
{OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
|
||||
};
|
||||
static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
|
||||
{OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
|
||||
{OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
|
||||
{OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
|
||||
{OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
|
||||
{OpCode::Type::Shift, &ShaderIR::DecodeShift},
|
||||
{OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
|
||||
{OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
|
||||
{OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
|
||||
{OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
|
||||
{OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
|
||||
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
|
||||
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
|
||||
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
|
||||
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
|
||||
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
|
||||
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
|
||||
{OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
|
||||
{OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
|
||||
{OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
|
||||
{OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
|
||||
{OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
|
||||
{OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
|
||||
{OpCode::Type::Video, &ShaderIR::DecodeVideo},
|
||||
{OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
|
||||
};
|
||||
|
||||
std::vector<Node> tmp_block;
|
||||
if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
|
||||
pc = (this->*decoder->second)(tmp_block, bb, pc);
|
||||
pc = (this->*decoder->second)(tmp_block, pc);
|
||||
} else {
|
||||
pc = DecodeOther(tmp_block, bb, pc);
|
||||
pc = DecodeOther(tmp_block, pc);
|
||||
}
|
||||
|
||||
// Some instructions (like SSY) don't have a predicate field, they are always unconditionally
|
||||
@@ -192,11 +191,14 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
|
||||
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
|
||||
|
||||
if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
|
||||
bb.push_back(
|
||||
Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)));
|
||||
const Node conditional =
|
||||
Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
|
||||
global_code.push_back(conditional);
|
||||
bb.push_back(conditional);
|
||||
} else {
|
||||
for (auto& node : tmp_block) {
|
||||
bb.push_back(std::move(node));
|
||||
global_code.push_back(node);
|
||||
bb.push_back(node);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::SubOp;
|
||||
|
||||
u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
@@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3
|
||||
|
||||
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
|
||||
|
||||
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
|
||||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
@@ -242,7 +242,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3
|
||||
return pc;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
|
||||
void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
|
||||
Node imm_lut, bool sets_cc) {
|
||||
constexpr u32 lop_iterations = 32;
|
||||
const Node one = Immediate(1);
|
||||
@@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, No
|
||||
SetRegister(bb, dest, value);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
@@ -16,7 +16,7 @@ using Tegra::Shader::Pred;
|
||||
using Tegra::Shader::PredicateResultMode;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
@@ -54,9 +54,9 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock&
|
||||
return pc;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op,
|
||||
Node op_a, Node op_b, PredicateResultMode predicate_mode,
|
||||
Pred predicate, bool sets_cc) {
|
||||
void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
|
||||
Node op_b, PredicateResultMode predicate_mode, Pred predicate,
|
||||
bool sets_cc) {
|
||||
const Node result = [&]() {
|
||||
switch (logic_op) {
|
||||
case LogicOperation::And:
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
@@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
|
||||
value = [&]() {
|
||||
switch (instr.conversion.f2i.rounding) {
|
||||
case Tegra::Shader::F2iRoundingOp::None:
|
||||
return value;
|
||||
case Tegra::Shader::F2iRoundingOp::RoundEven:
|
||||
return Operation(OperationCode::FRoundEven, PRECISE, value);
|
||||
case Tegra::Shader::F2iRoundingOp::Floor:
|
||||
return Operation(OperationCode::FFloor, PRECISE, value);
|
||||
case Tegra::Shader::F2iRoundingOp::Ceil:
|
||||
@@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ namespace VideoCommon::Shader {
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ using Tegra::Shader::HalfType;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {
|
||||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
@@ -160,7 +160,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
}();
|
||||
|
||||
const Node addr_register = GetRegister(instr.gpr8);
|
||||
const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size()));
|
||||
const Node base_address =
|
||||
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
|
||||
const auto cbuf = std::get_if<CbufNode>(base_address);
|
||||
ASSERT(cbuf != nullptr);
|
||||
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
|
||||
@@ -305,7 +306,6 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
case OpCode::Id::TLD4S: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
|
||||
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
|
||||
}
|
||||
@@ -314,9 +314,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node op_b = GetRegister(instr.gpr20);
|
||||
|
||||
std::vector<Node> coords;
|
||||
|
||||
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
|
||||
std::vector<Node> coords;
|
||||
if (depth_compare) {
|
||||
// Note: TLD4S coordinate encoding works just like TEXS's
|
||||
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
|
||||
@@ -327,18 +326,17 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_b);
|
||||
}
|
||||
const auto num_coords = static_cast<u32>(coords.size());
|
||||
coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
|
||||
std::vector<Node> extras;
|
||||
extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
|
||||
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, element, num_coords};
|
||||
values[element] =
|
||||
Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, {}, {}, extras, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, values);
|
||||
@@ -359,12 +357,13 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
switch (instr.txq.query_type) {
|
||||
case Tegra::Shader::TextureQueryType::Dimension: {
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (instr.txq.IsComponentEnabled(element)) {
|
||||
MetaTexture meta{sampler, element};
|
||||
const Node value = Operation(OperationCode::F4TextureQueryDimensions,
|
||||
std::move(meta), GetRegister(instr.gpr8));
|
||||
SetTemporal(bb, indexer++, value);
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{sampler, {}, {}, {}, element};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
||||
SetTemporal(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
@@ -411,9 +410,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())};
|
||||
const Node value =
|
||||
Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params));
|
||||
MetaTexture meta{sampler, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporal(bb, element, value);
|
||||
}
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
@@ -431,7 +429,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
|
||||
|
||||
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
|
||||
LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
|
||||
@@ -464,8 +462,7 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
|
||||
return *used_samplers.emplace(entry).first;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
|
||||
u32 dest_elem = 0;
|
||||
for (u32 elem = 0; elem < 4; ++elem) {
|
||||
if (!instr.tex.IsComponentEnabled(elem)) {
|
||||
@@ -480,7 +477,7 @@ void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
|
||||
void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
|
||||
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
|
||||
@@ -504,7 +501,7 @@ void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr,
|
||||
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
|
||||
// float instruction).
|
||||
@@ -535,15 +532,16 @@ void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr,
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array,
|
||||
std::size_t array_offset, std::size_t bias_offset,
|
||||
std::vector<Node>&& coords) {
|
||||
UNIMPLEMENTED_IF_MSG(
|
||||
(texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
|
||||
(texture_type == TextureType::TextureCube && is_array && depth_compare),
|
||||
"This method is not supported.");
|
||||
TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset) {
|
||||
const bool is_array = array;
|
||||
const bool is_shadow = depth_compare;
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
||||
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
|
||||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
|
||||
"This method is not supported.");
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
|
||||
|
||||
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
|
||||
process_mode == TextureProcessMode::LL ||
|
||||
@@ -552,35 +550,30 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
// LOD selection (either via bias or explicit textureLod) not supported in GL for
|
||||
// sampler2DArrayShadow and samplerCubeArrayShadow.
|
||||
const bool gl_lod_supported =
|
||||
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
|
||||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
|
||||
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
|
||||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
|
||||
|
||||
const OperationCode read_method =
|
||||
lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture;
|
||||
lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
|
||||
|
||||
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
|
||||
|
||||
std::optional<u32> array_offset_value;
|
||||
if (is_array)
|
||||
array_offset_value = static_cast<u32>(array_offset);
|
||||
|
||||
const auto coords_count = static_cast<u32>(coords.size());
|
||||
|
||||
std::vector<Node> extras;
|
||||
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
|
||||
if (process_mode == TextureProcessMode::LZ) {
|
||||
coords.push_back(Immediate(0.0f));
|
||||
extras.push_back(Immediate(0.0f));
|
||||
} else {
|
||||
// If present, lod or bias are always stored in the register indexed by the gpr20
|
||||
// field with an offset depending on the usage of the other registers
|
||||
coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
|
||||
extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
|
||||
}
|
||||
}
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, element, coords_count, array_offset_value};
|
||||
values[element] = Operation(read_method, std::move(meta), std::move(params));
|
||||
auto copy_coords = coords;
|
||||
MetaTexture meta{sampler, array, depth_compare, extras, element};
|
||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||
}
|
||||
|
||||
return values;
|
||||
@@ -602,28 +595,22 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
}
|
||||
// 1D.DC in opengl the 2nd component is ignored.
|
||||
// 1D.DC in OpenGL the 2nd component is ignored.
|
||||
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
|
||||
coords.push_back(Immediate(0.0f));
|
||||
}
|
||||
std::size_t array_offset{};
|
||||
if (is_array) {
|
||||
array_offset = coords.size();
|
||||
coords.push_back(GetRegister(array_register));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20
|
||||
// or in the next register if lod or bias are used
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
coords.push_back(GetRegister(depth_register));
|
||||
}
|
||||
// Fill ignored coordinates
|
||||
while (coords.size() < total_coord_count) {
|
||||
coords.push_back(Immediate(0));
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
|
||||
0, std::move(coords));
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
@@ -641,6 +628,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
const u32 bias_offset = coord_count > 2 ? 1 : 0;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
@@ -648,24 +636,17 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
std::size_t array_offset{};
|
||||
if (is_array) {
|
||||
array_offset = coords.size();
|
||||
coords.push_back(GetRegister(array_register));
|
||||
}
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20
|
||||
// or in the next register if lod or bias are used
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
coords.push_back(GetRegister(depth_register));
|
||||
}
|
||||
// Fill ignored coordinates
|
||||
while (coords.size() < total_coord_count) {
|
||||
coords.push_back(Immediate(0));
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
|
||||
(coord_count > 2 ? 1 : 0), std::move(coords));
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||
@@ -680,24 +661,16 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
|
||||
for (size_t i = 0; i < coord_count; ++i) {
|
||||
for (size_t i = 0; i < coord_count; ++i)
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
}
|
||||
std::optional<u32> array_offset;
|
||||
if (is_array) {
|
||||
array_offset = static_cast<u32>(coords.size());
|
||||
coords.push_back(GetRegister(array_register));
|
||||
}
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset};
|
||||
values[element] =
|
||||
Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
return values;
|
||||
@@ -705,7 +678,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
||||
|
||||
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
|
||||
const std::size_t type_coord_count = GetCoordCount(texture_type);
|
||||
const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0);
|
||||
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
@@ -719,33 +691,22 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
|
||||
: coord_register + 1;
|
||||
|
||||
std::vector<Node> coords;
|
||||
|
||||
for (std::size_t i = 0; i < type_coord_count; ++i) {
|
||||
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
std::optional<u32> array_offset;
|
||||
if (is_array) {
|
||||
array_offset = static_cast<u32>(coords.size());
|
||||
coords.push_back(GetRegister(array_register));
|
||||
}
|
||||
const auto coords_count = static_cast<u32>(coords.size());
|
||||
|
||||
if (lod_enabled) {
|
||||
// When lod is used always is in grp20
|
||||
coords.push_back(GetRegister(instr.gpr20));
|
||||
} else {
|
||||
coords.push_back(Immediate(0));
|
||||
}
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
// When lod is used always is in gpr20
|
||||
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, element, coords_count, array_offset};
|
||||
values[element] =
|
||||
Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array, {}, {lod}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
|
||||
u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Pred;
|
||||
|
||||
u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||
u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user