Compare commits

...

18 Commits

Author SHA1 Message Date
yuzubot
039a84338a Android 228 2024-02-04 01:00:37 +00:00
yuzubot
f0eb935051 Merge yuzu-emu#12903 2024-02-04 01:00:36 +00:00
yuzubot
db727765f1 Merge yuzu-emu#12892 2024-02-04 01:00:36 +00:00
yuzubot
60dda4ed9a Merge yuzu-emu#12756 2024-02-04 01:00:36 +00:00
yuzubot
dde2c301f3 Merge yuzu-emu#12749 2024-02-04 01:00:36 +00:00
yuzubot
0bf2470bdf Merge yuzu-emu#12461 2024-02-04 01:00:36 +00:00
liamwhite
5da55cbac9 Merge pull request #12901 from Kelebek1/timezone_firmware_fix
Fix firmware timezone boot load check.
2024-02-03 11:10:30 -05:00
liamwhite
81cc4df1f9 Merge pull request #12895 from german77/files
service: fs: Skip non user id folders
2024-02-03 11:10:24 -05:00
liamwhite
25f3d358b1 Merge pull request #12877 from german77/npad-fixed
service: hid: Multiple fixes
2024-02-03 11:10:14 -05:00
liamwhite
a3c8bb251d Merge pull request #12852 from Calinou/multiplayer-color-player-counts
Color player counts in the multiplayer public lobby list
2024-02-03 11:10:00 -05:00
liamwhite
327533be1f Merge pull request #12851 from Calinou/multiplayer-persist-filters
Persist filters in multiplayer public lobby list
2024-02-03 11:09:51 -05:00
liamwhite
61ea2115c7 Merge pull request #12850 from Calinou/multiplayer-add-hotkeys
Add hotkeys for multiplayer actions
2024-02-03 11:09:41 -05:00
Kelebek1
108a72ea8a Fix firmware timezone boot load check. 2024-02-03 15:21:10 +00:00
Narr the Reg
fb3ef957bb service: fs: Skip non user id folders 2024-02-02 13:25:38 -06:00
Narr the Reg
818721d12d service: hid: Multiple fixes 2024-02-01 10:37:44 -06:00
Hugo Locurcio
442aad9b27 Persist filters in multiplayer public lobby list
After connecting to a room, the chosen filter text, "Games I Own",
"Hide Empty Rooms" and "Hide Full Rooms" values are persisted
to configuration so they are preserved across restarts.

This makes it easier to rejoin a room if you regularly play the same
game, or after a crash.
2024-01-30 17:40:29 +01:00
Hugo Locurcio
8e0f97ac96 Color player counts in the multiplayer public lobby list
- Full lobbies have their player count displayed in red.
- Lobbies with one slot left have their player count displayed in orange.
- Empty lobbies have their player count grayed out.
2024-01-30 17:38:21 +01:00
Hugo Locurcio
345d691328 Add hotkeys for multiplayer actions
Default shortcuts were chosen as to be intuitive (use the first letter
of the action, or the second word's first letter) and work on all
types of keyboards. The hotkeys can be used while playing a game too,
as they are application-wide.
2024-01-30 01:32:14 +01:00
133 changed files with 4397 additions and 1589 deletions

View File

@@ -1,3 +1,16 @@
| Pull Request | Commit | Title | Author | Merged? |
|----|----|----|----|----|
| [12461](https://github.com/yuzu-emu/yuzu-android//pull/12461) | [`4c08a0e6d`](https://github.com/yuzu-emu/yuzu-android//pull/12461/files) | Rework Nvdec and VIC to fix out-of-order videos, and speed up decoding. | [Kelebek1](https://github.com/Kelebek1/) | Yes |
| [12749](https://github.com/yuzu-emu/yuzu-android//pull/12749) | [`aad4b0d6f`](https://github.com/yuzu-emu/yuzu-android//pull/12749/files) | general: workarounds for SMMU syncing issues | [liamwhite](https://github.com/liamwhite/) | Yes |
| [12756](https://github.com/yuzu-emu/yuzu-android//pull/12756) | [`b19285d4f`](https://github.com/yuzu-emu/yuzu-android//pull/12756/files) | general: applet multiprocess | [liamwhite](https://github.com/liamwhite/) | Yes |
| [12892](https://github.com/yuzu-emu/yuzu-android//pull/12892) | [`78f72b3bf`](https://github.com/yuzu-emu/yuzu-android//pull/12892/files) | cmif_serialization: enforce const for references | [liamwhite](https://github.com/liamwhite/) | Yes |
| [12903](https://github.com/yuzu-emu/yuzu-android//pull/12903) | [`5be8121af`](https://github.com/yuzu-emu/yuzu-android//pull/12903/files) | shader_recompiler: use only ConstOffset for OpImageFetch | [liamwhite](https://github.com/liamwhite/) | Yes |
End of merge log. You can find the original README.md below the break.
-----
<!--
SPDX-FileCopyrightText: 2018 yuzu Emulator Project
SPDX-License-Identifier: GPL-2.0-or-later

View File

@@ -164,6 +164,7 @@ else()
if (MINGW)
add_definitions(-DMINGW_HAS_SECURE_API)
add_compile_options("-msse4.1")
if (MINGW_STATIC_BUILD)
add_definitions(-DQT_STATICPLUGIN)

View File

@@ -9,6 +9,7 @@
#include <string>
#include <vector>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/common_types.h"
namespace Common {
@@ -29,6 +30,8 @@ namespace Common {
template <std::size_t Size, bool le = false>
[[nodiscard]] constexpr std::array<u8, Size> HexStringToArray(std::string_view str) {
ASSERT_MSG(Size * 2 <= str.size(), "Invalid string size");
std::array<u8, Size> out{};
if constexpr (le) {
for (std::size_t i = 2 * Size - 2; i <= 2 * Size; i -= 2) {

View File

@@ -30,6 +30,7 @@ namespace Settings {
#define SETTING(TYPE, RANGED) template class Setting<TYPE, RANGED>
#define SWITCHABLE(TYPE, RANGED) template class SwitchableSetting<TYPE, RANGED>
SETTING(AppletMode, false);
SETTING(AudioEngine, false);
SETTING(bool, false);
SETTING(int, false);
@@ -215,6 +216,8 @@ const char* TranslateCategory(Category category) {
return "Debugging";
case Category::GpuDriver:
return "GpuDriver";
case Category::LibraryApplet:
return "LibraryApplet";
case Category::Miscellaneous:
return "Miscellaneous";
case Category::Network:

View File

@@ -133,6 +133,38 @@ struct TouchFromButtonMap {
struct Values {
Linkage linkage{};
// Applet
Setting<AppletMode> cabinet_applet_mode{linkage, AppletMode::LLE, "cabinet_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> controller_applet_mode{linkage, AppletMode::HLE, "controller_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> data_erase_applet_mode{linkage, AppletMode::HLE, "data_erase_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> error_applet_mode{linkage, AppletMode::HLE, "error_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> net_connect_applet_mode{linkage, AppletMode::HLE, "net_connect_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> player_select_applet_mode{
linkage, AppletMode::HLE, "player_select_applet_mode", Category::LibraryApplet};
Setting<AppletMode> swkbd_applet_mode{linkage, AppletMode::LLE, "swkbd_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> mii_edit_applet_mode{linkage, AppletMode::LLE, "mii_edit_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> web_applet_mode{linkage, AppletMode::HLE, "web_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> shop_applet_mode{linkage, AppletMode::HLE, "shop_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> photo_viewer_applet_mode{
linkage, AppletMode::LLE, "photo_viewer_applet_mode", Category::LibraryApplet};
Setting<AppletMode> offline_web_applet_mode{linkage, AppletMode::LLE, "offline_web_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> login_share_applet_mode{linkage, AppletMode::HLE, "login_share_applet_mode",
Category::LibraryApplet};
Setting<AppletMode> wifi_web_auth_applet_mode{
linkage, AppletMode::HLE, "wifi_web_auth_applet_mode", Category::LibraryApplet};
Setting<AppletMode> my_page_applet_mode{linkage, AppletMode::LLE, "my_page_applet_mode",
Category::LibraryApplet};
// Audio
SwitchableSetting<AudioEngine> sink_id{linkage, AudioEngine::Auto, "output_engine",
Category::Audio, Specialization::RuntimeList};

View File

@@ -44,6 +44,7 @@ enum class Category : u32 {
Services,
Paths,
Linux,
LibraryApplet,
MaxEnum,
};

View File

@@ -151,6 +151,8 @@ ENUM(AspectRatio, R16_9, R4_3, R21_9, R16_10, Stretch);
ENUM(ConsoleMode, Handheld, Docked);
ENUM(AppletMode, HLE, LLE);
template <typename Type>
inline std::string CanonicalizeEnum(Type id) {
const auto group = EnumMetadata<Type>::Canonicalizations();

View File

@@ -43,6 +43,8 @@ public:
DeviceMemoryManager(const DeviceMemory& device_memory);
~DeviceMemoryManager();
static constexpr bool HAS_FLUSH_INVALIDATION = true;
void BindInterface(DeviceInterface* device_inter);
DAddr Allocate(size_t size);

View File

@@ -44,15 +44,32 @@ public:
GuestMemory() = delete;
explicit GuestMemory(M& memory, u64 addr, std::size_t size,
Common::ScratchBuffer<T>* backup = nullptr)
: m_memory{memory}, m_addr{addr}, m_size{size} {
: m_memory{&memory}, m_addr{addr}, m_size{size} {
static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
if constexpr (FLAGS & GuestMemoryFlags::Read) {
if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
if (!this->TrySetSpan()) {
if (backup) {
backup->resize_destructive(this->size());
m_data_span = *backup;
m_span_valid = true;
m_is_data_copy = true;
} else {
m_data_copy.resize(this->size());
m_data_span = std::span(m_data_copy);
m_span_valid = true;
m_is_data_copy = true;
}
}
} else if constexpr (FLAGS & GuestMemoryFlags::Read) {
Read(addr, size, backup);
}
}
~GuestMemory() = default;
GuestMemory(GuestMemory&& rhs) = default;
GuestMemory& operator=(GuestMemory&& rhs) = default;
T* data() noexcept {
return m_data_span.data();
}
@@ -109,8 +126,8 @@ public:
}
if (this->TrySetSpan()) {
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
m_memory.FlushRegion(m_addr, this->size_bytes());
if constexpr (FLAGS & GuestMemoryFlags::Safe && M::HAS_FLUSH_INVALIDATION) {
m_memory->FlushRegion(m_addr, this->size_bytes());
}
} else {
if (backup) {
@@ -123,9 +140,9 @@ public:
m_is_data_copy = true;
m_span_valid = true;
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
m_memory.ReadBlock(m_addr, this->data(), this->size_bytes());
m_memory->ReadBlock(m_addr, this->data(), this->size_bytes());
} else {
m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
m_memory->ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
}
}
return m_data_span;
@@ -133,18 +150,19 @@ public:
void Write(std::span<T> write_data) noexcept {
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
m_memory->WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes());
m_memory->WriteBlock(m_addr, write_data.data(), this->size_bytes());
} else {
m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
m_memory->WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
}
}
bool TrySetSpan() noexcept {
if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) {
if (u8* ptr = m_memory->GetSpan(m_addr, this->size_bytes()); ptr) {
m_data_span = {reinterpret_cast<T*>(ptr), this->size()};
m_span_valid = true;
m_is_data_copy = false;
return true;
}
return false;
@@ -159,7 +177,7 @@ protected:
return m_addr_changed;
}
M& m_memory;
M* m_memory;
u64 m_addr{};
size_t m_size{};
std::span<T> m_data_span{};
@@ -175,17 +193,7 @@ public:
GuestMemoryScoped() = delete;
explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size,
Common::ScratchBuffer<T>* backup = nullptr)
: GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {
if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
if (!this->TrySetSpan()) {
if (backup) {
this->m_data_span = *backup;
this->m_span_valid = true;
this->m_is_data_copy = true;
}
}
}
}
: GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {}
~GuestMemoryScoped() {
if constexpr (FLAGS & GuestMemoryFlags::Write) {
@@ -196,15 +204,17 @@ public:
if (this->AddressChanged() || this->IsDataCopy()) {
ASSERT(this->m_span_valid);
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes());
this->m_memory->WriteBlockCached(this->m_addr, this->data(),
this->size_bytes());
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes());
this->m_memory->WriteBlock(this->m_addr, this->data(), this->size_bytes());
} else {
this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes());
this->m_memory->WriteBlockUnsafe(this->m_addr, this->data(),
this->size_bytes());
}
} else if constexpr ((FLAGS & GuestMemoryFlags::Safe) ||
(FLAGS & GuestMemoryFlags::Cached)) {
this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes());
this->m_memory->InvalidateRegion(this->m_addr, this->size_bytes());
}
}
}

View File

@@ -4,8 +4,9 @@
#include <random>
#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/arm/dynarmic/arm_dynarmic.h"
#include "core/arm/dynarmic/dynarmic_exclusive_monitor.h"
#include "core/core.h"
#include "core/gpu_dirty_memory_manager.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/k_scoped_resource_reservation.h"
#include "core/hle/kernel/k_shared_memory.h"
@@ -1258,6 +1259,10 @@ void KProcess::InitializeInterfaces() {
#ifdef HAS_NCE
if (this->IsApplication() && Settings::IsNceEnabled()) {
// Register the scoped JIT handler before creating any NCE instances
// so that its signal handler will appear first in the signal chain.
Core::ScopedJitExecution::RegisterHandler();
for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
m_arm_interfaces[i] = std::make_unique<Core::ArmNce>(m_kernel.System(), true, i);
}

View File

@@ -130,9 +130,9 @@ enum class AppletProgramId : u64 {
enum class LibraryAppletMode : u32 {
AllForeground = 0,
Background = 1,
NoUI = 2,
BackgroundIndirectDisplay = 3,
PartialForeground = 1,
NoUi = 2,
PartialForegroundIndirectDisplay = 3,
AllForegroundInitiallyHidden = 4,
};

View File

@@ -68,9 +68,9 @@ void SoftwareKeyboard::Initialize() {
case LibraryAppletMode::AllForeground:
InitializeForeground();
break;
case LibraryAppletMode::Background:
case LibraryAppletMode::BackgroundIndirectDisplay:
InitializeBackground(applet_mode);
case LibraryAppletMode::PartialForeground:
case LibraryAppletMode::PartialForegroundIndirectDisplay:
InitializePartialForeground(applet_mode);
break;
default:
ASSERT_MSG(false, "Invalid LibraryAppletMode={}", applet_mode);
@@ -243,7 +243,7 @@ void SoftwareKeyboard::InitializeForeground() {
InitializeFrontendNormalKeyboard();
}
void SoftwareKeyboard::InitializeBackground(LibraryAppletMode library_applet_mode) {
void SoftwareKeyboard::InitializePartialForeground(LibraryAppletMode library_applet_mode) {
LOG_INFO(Service_AM, "Initializing Inline Software Keyboard Applet.");
is_background = true;
@@ -258,9 +258,9 @@ void SoftwareKeyboard::InitializeBackground(LibraryAppletMode library_applet_mod
swkbd_inline_initialize_arg.size());
if (swkbd_initialize_arg.library_applet_mode_flag) {
ASSERT(library_applet_mode == LibraryAppletMode::Background);
ASSERT(library_applet_mode == LibraryAppletMode::PartialForeground);
} else {
ASSERT(library_applet_mode == LibraryAppletMode::BackgroundIndirectDisplay);
ASSERT(library_applet_mode == LibraryAppletMode::PartialForegroundIndirectDisplay);
}
}

View File

@@ -62,7 +62,7 @@ private:
void InitializeForeground();
/// Initializes the inline software keyboard.
void InitializeBackground(LibraryAppletMode library_applet_mode);
void InitializePartialForeground(LibraryAppletMode library_applet_mode);
/// Processes the text check sent by the application.
void ProcessTextCheck();

View File

@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
#include "core/hle/kernel/k_transfer_memory.h"
#include "core/hle/service/am/applet_data_broker.h"
#include "core/hle/service/am/applet_manager.h"
@@ -16,6 +17,34 @@ namespace Service::AM {
namespace {
bool ShouldCreateGuestApplet(AppletId applet_id) {
#define X(Name, name) \
if (applet_id == AppletId::Name && \
Settings::values.name##_applet_mode.GetValue() != Settings::AppletMode::LLE) { \
return false; \
}
X(Cabinet, cabinet)
X(Controller, controller)
X(DataErase, data_erase)
X(Error, error)
X(NetConnect, net_connect)
X(ProfileSelect, player_select)
X(SoftwareKeyboard, swkbd)
X(MiiEdit, mii_edit)
X(Web, web)
X(Shop, shop)
X(PhotoViewer, photo_viewer)
X(OfflineWeb, offline_web)
X(LoginShare, login_share)
X(WebAuth, wifi_web_auth)
X(MyPage, my_page)
#undef X
return true;
}
AppletProgramId AppletIdToProgramId(AppletId applet_id) {
switch (applet_id) {
case AppletId::OverlayDisplay:
@@ -63,9 +92,10 @@ AppletProgramId AppletIdToProgramId(AppletId applet_id) {
}
}
[[maybe_unused]] std::shared_ptr<ILibraryAppletAccessor> CreateGuestApplet(
Core::System& system, std::shared_ptr<Applet> caller_applet, AppletId applet_id,
LibraryAppletMode mode) {
std::shared_ptr<ILibraryAppletAccessor> CreateGuestApplet(Core::System& system,
std::shared_ptr<Applet> caller_applet,
AppletId applet_id,
LibraryAppletMode mode) {
const auto program_id = static_cast<u64>(AppletIdToProgramId(applet_id));
if (program_id == 0) {
// Unknown applet
@@ -87,7 +117,7 @@ AppletProgramId AppletIdToProgramId(AppletId applet_id) {
// Set focus state
switch (mode) {
case LibraryAppletMode::AllForeground:
case LibraryAppletMode::NoUI:
case LibraryAppletMode::NoUi:
applet->focus_state = FocusState::InFocus;
applet->hid_registration.EnableAppletToGetInput(true);
applet->message_queue.PushMessage(AppletMessageQueue::AppletMessage::ChangeIntoForeground);
@@ -99,8 +129,8 @@ AppletProgramId AppletIdToProgramId(AppletId applet_id) {
applet->hid_registration.EnableAppletToGetInput(false);
applet->message_queue.PushMessage(AppletMessageQueue::AppletMessage::FocusStateChanged);
break;
case LibraryAppletMode::Background:
case LibraryAppletMode::BackgroundIndirectDisplay:
case LibraryAppletMode::PartialForeground:
case LibraryAppletMode::PartialForegroundIndirectDisplay:
default:
applet->focus_state = FocusState::Background;
applet->hid_registration.EnableAppletToGetInput(true);
@@ -117,9 +147,10 @@ AppletProgramId AppletIdToProgramId(AppletId applet_id) {
return std::make_shared<ILibraryAppletAccessor>(system, broker, applet);
}
[[maybe_unused]] std::shared_ptr<ILibraryAppletAccessor> CreateFrontendApplet(
Core::System& system, std::shared_ptr<Applet> caller_applet, AppletId applet_id,
LibraryAppletMode mode) {
std::shared_ptr<ILibraryAppletAccessor> CreateFrontendApplet(Core::System& system,
std::shared_ptr<Applet> caller_applet,
AppletId applet_id,
LibraryAppletMode mode) {
const auto program_id = static_cast<u64>(AppletIdToProgramId(applet_id));
auto process = std::make_unique<Process>(system);
@@ -163,7 +194,13 @@ void ILibraryAppletCreator::CreateLibraryApplet(HLERequestContext& ctx) {
LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}", applet_id,
applet_mode);
auto library_applet = CreateFrontendApplet(system, applet, applet_id, applet_mode);
std::shared_ptr<ILibraryAppletAccessor> library_applet;
if (ShouldCreateGuestApplet(applet_id)) {
library_applet = CreateGuestApplet(system, applet, applet_id, applet_mode);
}
if (!library_applet) {
library_applet = CreateFrontendApplet(system, applet, applet_id, applet_mode);
}
if (!library_applet) {
LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", applet_id);

View File

@@ -1,10 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/logging/log.h"
#include "core/hle/result.h"
#include "core/hle/service/am/am_results.h"
#include "core/hle/service/am/frontend/applets.h"
#include "core/hle/service/am/self_controller.h"
#include "core/hle/service/caps/caps_su.h"
#include "core/hle/service/hle_ipc.h"
#include "core/hle/service/ipc_helpers.h"
#include "core/hle/service/nvnflinger/fb_share_buffer_manager.h"
#include "core/hle/service/nvnflinger/nvnflinger.h"
@@ -47,7 +50,7 @@ ISelfController::ISelfController(Core::System& system_, std::shared_ptr<Applet>
{50, &ISelfController::SetHandlesRequestToDisplay, "SetHandlesRequestToDisplay"},
{51, &ISelfController::ApproveToDisplay, "ApproveToDisplay"},
{60, nullptr, "OverrideAutoSleepTimeAndDimmingTime"},
{61, nullptr, "SetMediaPlaybackState"},
{61, &ISelfController::SetMediaPlaybackState, "SetMediaPlaybackState"},
{62, &ISelfController::SetIdleTimeDetectionExtension, "SetIdleTimeDetectionExtension"},
{63, &ISelfController::GetIdleTimeDetectionExtension, "GetIdleTimeDetectionExtension"},
{64, nullptr, "SetInputDetectionSourceSet"},
@@ -288,7 +291,8 @@ void ISelfController::GetSystemSharedBufferHandle(HLERequestContext& ctx) {
}
Result ISelfController::EnsureBufferSharingEnabled(Kernel::KProcess* process) {
if (applet->system_buffer_manager.Initialize(&nvnflinger, process, applet->applet_id)) {
if (applet->system_buffer_manager.Initialize(&nvnflinger, process, applet->applet_id,
applet->library_applet_mode)) {
return ResultSuccess;
}
@@ -323,6 +327,16 @@ void ISelfController::ApproveToDisplay(HLERequestContext& ctx) {
rb.Push(ResultSuccess);
}
void ISelfController::SetMediaPlaybackState(HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const u8 state = rp.Pop<u8>();
LOG_WARNING(Service_AM, "(STUBBED) called, state={}", state);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
}
void ISelfController::SetIdleTimeDetectionExtension(HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};

View File

@@ -3,6 +3,7 @@
#pragma once
#include "core/hle/service/hle_ipc.h"
#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/service.h"
@@ -38,6 +39,7 @@ private:
void CreateManagedDisplaySeparableLayer(HLERequestContext& ctx);
void SetHandlesRequestToDisplay(HLERequestContext& ctx);
void ApproveToDisplay(HLERequestContext& ctx);
void SetMediaPlaybackState(HLERequestContext& ctx);
void SetIdleTimeDetectionExtension(HLERequestContext& ctx);
void GetIdleTimeDetectionExtension(HLERequestContext& ctx);
void ReportUserIsActive(HLERequestContext& ctx);

View File

@@ -17,11 +17,12 @@ SystemBufferManager::~SystemBufferManager() {
// Clean up shared layers.
if (m_buffer_sharing_enabled) {
m_nvnflinger->GetSystemBufferManager().Finalize(m_process);
}
}
bool SystemBufferManager::Initialize(Nvnflinger::Nvnflinger* nvnflinger, Kernel::KProcess* process,
AppletId applet_id) {
AppletId applet_id, LibraryAppletMode mode) {
if (m_nvnflinger) {
return m_buffer_sharing_enabled;
}
@@ -36,9 +37,14 @@ bool SystemBufferManager::Initialize(Nvnflinger::Nvnflinger* nvnflinger, Kernel:
return false;
}
Nvnflinger::LayerBlending blending = Nvnflinger::LayerBlending::None;
if (mode == LibraryAppletMode::PartialForeground) {
blending = Nvnflinger::LayerBlending::Coverage;
}
const auto display_id = m_nvnflinger->OpenDisplay("Default").value();
const auto res = m_nvnflinger->GetSystemBufferManager().Initialize(
&m_system_shared_buffer_id, &m_system_shared_layer_id, display_id);
m_process, &m_system_shared_buffer_id, &m_system_shared_layer_id, display_id, blending);
if (res.IsSuccess()) {
m_buffer_sharing_enabled = true;
@@ -62,8 +68,12 @@ void SystemBufferManager::SetWindowVisibility(bool visible) {
Result SystemBufferManager::WriteAppletCaptureBuffer(bool* out_was_written,
s32* out_fbshare_layer_index) {
// TODO
R_SUCCEED();
if (!m_buffer_sharing_enabled) {
return VI::ResultPermissionDenied;
}
return m_nvnflinger->GetSystemBufferManager().WriteAppletCaptureBuffer(out_was_written,
out_fbshare_layer_index);
}
} // namespace Service::AM

View File

@@ -27,7 +27,8 @@ public:
SystemBufferManager();
~SystemBufferManager();
bool Initialize(Nvnflinger::Nvnflinger* flinger, Kernel::KProcess* process, AppletId applet_id);
bool Initialize(Nvnflinger::Nvnflinger* flinger, Kernel::KProcess* process, AppletId applet_id,
LibraryAppletMode mode);
void GetSystemSharedLayerHandle(u64* out_system_shared_buffer_id,
u64* out_system_shared_layer_id) {

View File

@@ -115,6 +115,11 @@ struct ArgumentTraits {
static constexpr ArgumentType Type = ArgumentType::InData;
};
template <typename... Ts>
consteval bool ConstIfReference() {
return ((!std::is_reference_v<Ts> || std::is_const_v<std::remove_reference_t<Ts>>) && ... && true);
}
struct RequestLayout {
u32 copy_handle_count;
u32 move_handle_count;
@@ -435,6 +440,7 @@ void CmifReplyWrapImpl(HLERequestContext& ctx, T& t, Result (T::*f)(A...)) {
}
const bool is_domain = Domain ? ctx.GetManager()->IsDomain() : false;
static_assert(ConstIfReference<A...>(), "Arguments taken by reference must be const");
using MethodArguments = std::tuple<std::remove_cvref_t<A>...>;
OutTemporaryBuffers buffers{};

View File

@@ -4,10 +4,9 @@
#pragma once
#include <memory>
#include <span>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "core/hle/service/hle_ipc.h"
namespace Service {
@@ -22,8 +21,10 @@ class Out {
public:
using Type = T;
/* implicit */ Out(const Out& t) : raw(t.raw) {}
/* implicit */ Out(AutoOut<Type>& t) : raw(&t.raw) {}
/* implicit */ Out(Type* t) : raw(t) {}
Out& operator=(const Out&) = delete;
Type* Get() const {
return raw;
@@ -37,6 +38,10 @@ public:
return raw;
}
operator Type*() const {
return raw;
}
private:
Type* raw;
};
@@ -113,8 +118,10 @@ class OutCopyHandle {
public:
using Type = T*;
/* implicit */ OutCopyHandle(const OutCopyHandle& t) : raw(t.raw) {}
/* implicit */ OutCopyHandle(AutoOut<Type>& t) : raw(&t.raw) {}
/* implicit */ OutCopyHandle(Type* t) : raw(t) {}
OutCopyHandle& operator=(const OutCopyHandle&) = delete;
Type* Get() const {
return raw;
@@ -128,6 +135,10 @@ public:
return raw;
}
operator Type*() const {
return raw;
}
private:
Type* raw;
};
@@ -137,8 +148,10 @@ class OutMoveHandle {
public:
using Type = T*;
/* implicit */ OutMoveHandle(const OutMoveHandle& t) : raw(t.raw) {}
/* implicit */ OutMoveHandle(AutoOut<Type>& t) : raw(&t.raw) {}
/* implicit */ OutMoveHandle(Type* t) : raw(t) {}
OutMoveHandle& operator=(const OutMoveHandle&) = delete;
Type* Get() const {
return raw;
@@ -152,6 +165,10 @@ public:
return raw;
}
operator Type*() const {
return raw;
}
private:
Type* raw;
};
@@ -248,8 +265,10 @@ public:
static constexpr BufferAttr Attr = static_cast<BufferAttr>(A | BufferAttr_In | BufferAttr_FixedSize);
using Type = T;
/* implicit */ OutLargeData(const OutLargeData& t) : raw(t.raw) {}
/* implicit */ OutLargeData(Type* t) : raw(t) {}
/* implicit */ OutLargeData(AutoOut<T>& t) : raw(&t.raw) {}
OutLargeData& operator=(const OutLargeData&) = delete;
Type* Get() const {
return raw;
@@ -263,6 +282,10 @@ public:
return raw;
}
operator Type*() const {
return raw;
}
private:
Type* raw;
};

View File

@@ -115,6 +115,11 @@ private:
if (type->GetName() == "save") {
for (const auto& save_id : type->GetSubdirectories()) {
for (const auto& user_id : save_id->GetSubdirectories()) {
// Skip non user id subdirectories
if (user_id->GetName().size() != 0x20) {
continue;
}
const auto save_id_numeric = stoull_be(save_id->GetName());
auto user_id_numeric = Common::HexStringToArray<0x10>(user_id->GetName());
std::reverse(user_id_numeric.begin(), user_id_numeric.end());
@@ -160,6 +165,10 @@ private:
} else if (space == FileSys::SaveDataSpaceId::TemporaryStorage) {
// Temporary Storage
for (const auto& user_id : type->GetSubdirectories()) {
// Skip non user id subdirectories
if (user_id->GetName().size() != 0x20) {
continue;
}
for (const auto& title_id : user_id->GetSubdirectories()) {
if (!title_id->GetFiles().empty() ||
!title_id->GetSubdirectories().empty()) {

View File

@@ -65,6 +65,7 @@ Result MountTimeZoneBinary(Core::System& system) {
// Validate that the romfs is readable, using invalid firmware keys can cause this to get
// set but the files to be garbage. In that case, we want to hit the next path and
// synthesise them instead.
g_time_zone_binary_mount_result = ResultSuccess;
Service::PSC::Time::LocationName name{"Etc/GMT"};
if (!IsTimeZoneBinaryValid(name)) {
ResetTimeZoneBinary();

View File

@@ -49,6 +49,7 @@ SessionId Container::OpenSession(Kernel::KProcess* process) {
continue;
}
if (session.process == process) {
session.ref_count++;
return session.id;
}
}
@@ -66,6 +67,7 @@ SessionId Container::OpenSession(Kernel::KProcess* process) {
}
auto& session = impl->sessions[new_id];
session.is_active = true;
session.ref_count = 1;
// Optimization
if (process->IsApplication()) {
auto& page_table = process->GetPageTable().GetBasePageTable();
@@ -114,8 +116,11 @@ SessionId Container::OpenSession(Kernel::KProcess* process) {
void Container::CloseSession(SessionId session_id) {
std::scoped_lock lk(impl->session_guard);
impl->file.UnmapAllHandles(session_id);
auto& session = impl->sessions[session_id.id];
if (--session.ref_count > 0) {
return;
}
impl->file.UnmapAllHandles(session_id);
auto& smmu = impl->host1x.MemoryManager();
if (session.has_preallocated_area) {
const DAddr region_start = session.mapper->GetRegionStart();

View File

@@ -46,6 +46,7 @@ struct Session {
bool has_preallocated_area{};
std::unique_ptr<HeapMapper> mapper{};
bool is_active{};
s32 ref_count{};
};
class Container {
@@ -67,10 +68,7 @@ public:
const SyncpointManager& GetSyncpointManager() const;
struct Host1xDeviceFileData {
std::unordered_map<DeviceFD, u32> fd_to_id{};
std::deque<u32> syncpts_accumulated{};
u32 nvdec_next_id{};
u32 vic_next_id{};
};
Host1xDeviceFileData& Host1xDeviceFile();

View File

@@ -333,9 +333,13 @@ void NvMap::UnmapAllHandles(NvCore::SessionId session_id) {
}();
for (auto& [id, handle] : handles_copy) {
if (handle->session_id.id == session_id.id) {
FreeHandle(id, false);
{
std::scoped_lock lk{handle->mutex};
if (handle->session_id.id != session_id.id || handle->dupes <= 0) {
continue;
}
}
FreeHandle(id, false);
}
}

View File

@@ -15,6 +15,22 @@
namespace Service::Nvidia::Devices {
namespace {
Tegra::BlendMode ConvertBlending(Service::Nvnflinger::LayerBlending blending) {
switch (blending) {
case Service::Nvnflinger::LayerBlending::None:
default:
return Tegra::BlendMode::Opaque;
case Service::Nvnflinger::LayerBlending::Premultiplied:
return Tegra::BlendMode::Premultiplied;
case Service::Nvnflinger::LayerBlending::Coverage:
return Tegra::BlendMode::Coverage;
}
}
} // namespace
nvdisp_disp0::nvdisp_disp0(Core::System& system_, NvCore::Container& core)
: nvdevice{system_}, container{core}, nvmap{core.GetNvMapFile()} {}
nvdisp_disp0::~nvdisp_disp0() = default;
@@ -56,6 +72,7 @@ void nvdisp_disp0::Composite(std::span<const Nvnflinger::HwcLayer> sorted_layers
.pixel_format = layer.format,
.transform_flags = layer.transform,
.crop_rect = layer.crop_rect,
.blending = ConvertBlending(layer.blending),
});
for (size_t i = 0; i < layer.acquire_fence.num_fences; i++) {

View File

@@ -8,6 +8,7 @@
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/devices/ioctl_serialization.h"
#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
#include "video_core/host1x/host1x.h"
#include "video_core/renderer_base.h"
namespace Service::Nvidia::Devices {
@@ -21,13 +22,8 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
switch (command.group) {
case 0x0:
switch (command.cmd) {
case 0x1: {
auto& host1x_file = core.Host1xDeviceFile();
if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++;
}
case 0x1:
return WrapFixedVariable(this, &nvhost_nvdec::Submit, input, output, fd);
}
case 0x2:
return WrapFixed(this, &nvhost_nvdec::GetSyncpoint, input, output);
case 0x3:
@@ -72,15 +68,12 @@ void nvhost_nvdec::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
LOG_INFO(Service_NVDRV, "NVDEC video stream started");
system.SetNVDECActive(true);
sessions[fd] = session_id;
host1x.StartDevice(fd, Tegra::Host1x::ChannelType::NvDec, channel_syncpoint);
}
void nvhost_nvdec::OnClose(DeviceFD fd) {
LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
auto& host1x_file = core.Host1xDeviceFile();
const auto iter = host1x_file.fd_to_id.find(fd);
if (iter != host1x_file.fd_to_id.end()) {
system.GPU().ClearCdmaInstance(iter->second);
}
host1x.StopDevice(fd, Tegra::Host1x::ChannelType::NvDec);
system.SetNVDECActive(false);
auto it = sessions.find(fd);
if (it != sessions.end()) {

View File

@@ -55,8 +55,9 @@ std::size_t WriteVectors(std::span<u8> dst, const std::vector<T>& src, std::size
nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_,
NvCore::ChannelType channel_type_)
: nvdevice{system_}, core{core_}, syncpoint_manager{core.GetSyncpointManager()},
nvmap{core.GetNvMapFile()}, channel_type{channel_type_} {
: nvdevice{system_}, host1x{system_.Host1x()}, core{core_},
syncpoint_manager{core.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
channel_type{channel_type_} {
auto& syncpts_accumulated = core.Host1xDeviceFile().syncpts_accumulated;
if (syncpts_accumulated.empty()) {
channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false);
@@ -95,24 +96,24 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
offset += SliceVectors(data, syncpt_increments, params.syncpoint_count, offset);
offset += SliceVectors(data, fence_thresholds, params.fence_count, offset);
auto& gpu = system.GPU();
auto* session = core.GetSession(sessions[fd]);
if (gpu.UseNvdec()) {
for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
const SyncptIncr& syncpt_incr = syncpt_increments[i];
fence_thresholds[i] =
syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments);
}
for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
const SyncptIncr& syncpt_incr = syncpt_increments[i];
fence_thresholds[i] =
syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments);
}
for (const auto& cmd_buffer : command_buffers) {
const auto object = nvmap.GetHandle(cmd_buffer.memory_id);
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
Core::Memory::CpuGuestMemory<Tegra::ChCommandHeader,
Core::Memory::GuestMemoryFlags::SafeRead>
cmdlist(session->process->GetMemory(), object->address + cmd_buffer.offset,
cmd_buffer.word_count);
host1x.PushEntries(fd, std::move(cmdlist));
}
// Some games expect command_buffers to be written back
offset = 0;
offset += WriteVectors(data, command_buffers, offset);

View File

@@ -119,6 +119,7 @@ protected:
Kernel::KEvent* QueryEvent(u32 event_id) override;
Tegra::Host1x::Host1x& host1x;
u32 channel_syncpoint;
s32_le nvmap_fd{};
u32_le submit_timeout{};

View File

@@ -7,6 +7,7 @@
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/devices/ioctl_serialization.h"
#include "core/hle/service/nvdrv/devices/nvhost_vic.h"
#include "video_core/host1x/host1x.h"
#include "video_core/renderer_base.h"
namespace Service::Nvidia::Devices {
@@ -21,13 +22,8 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
switch (command.group) {
case 0x0:
switch (command.cmd) {
case 0x1: {
auto& host1x_file = core.Host1xDeviceFile();
if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++;
}
case 0x1:
return WrapFixedVariable(this, &nvhost_vic::Submit, input, output, fd);
}
case 0x2:
return WrapFixed(this, &nvhost_vic::GetSyncpoint, input, output);
case 0x3:
@@ -70,14 +66,11 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
void nvhost_vic::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
sessions[fd] = session_id;
host1x.StartDevice(fd, Tegra::Host1x::ChannelType::VIC, channel_syncpoint);
}
void nvhost_vic::OnClose(DeviceFD fd) {
auto& host1x_file = core.Host1xDeviceFile();
const auto iter = host1x_file.fd_to_id.find(fd);
if (iter != host1x_file.fd_to_id.end()) {
system.GPU().ClearCdmaInstance(iter->second);
}
host1x.StopDevice(fd, Tegra::Host1x::ChannelType::VIC);
sessions.erase(fd);
}

View File

@@ -14,24 +14,20 @@
#include "core/hle/service/nvnflinger/ui/graphic_buffer.h"
#include "core/hle/service/vi/layer/vi_layer.h"
#include "core/hle/service/vi/vi_results.h"
#include "video_core/gpu.h"
#include "video_core/host1x/host1x.h"
namespace Service::Nvnflinger {
namespace {
Result AllocateIoForProcessAddressSpace(Common::ProcessAddress* out_map_address,
std::unique_ptr<Kernel::KPageGroup>* out_page_group,
Core::System& system, u32 size) {
Result AllocateSharedBufferMemory(std::unique_ptr<Kernel::KPageGroup>* out_page_group,
Core::System& system, u32 size) {
using Core::Memory::YUZU_PAGESIZE;
// Allocate memory for the system shared buffer.
// FIXME: Because the gmmu can only point to cpu addresses, we need
// to map this in the application space to allow it to be used.
// FIXME: Add proper smmu emulation.
// FIXME: This memory belongs to vi's .data section.
auto& kernel = system.Kernel();
auto* process = system.ApplicationProcess();
auto& page_table = process->GetPageTable();
// Hold a temporary page group reference while we try to map it.
auto pg = std::make_unique<Kernel::KPageGroup>(
@@ -43,6 +39,30 @@ Result AllocateIoForProcessAddressSpace(Common::ProcessAddress* out_map_address,
Kernel::KMemoryManager::EncodeOption(Kernel::KMemoryManager::Pool::Secure,
Kernel::KMemoryManager::Direction::FromBack)));
// Fill the output data with red.
for (auto& block : *pg) {
u32* start = system.DeviceMemory().GetPointer<u32>(block.GetAddress());
u32* end = system.DeviceMemory().GetPointer<u32>(block.GetAddress() + block.GetSize());
for (; start < end; start++) {
*start = 0xFF0000FF;
}
}
// Return the mapped page group.
*out_page_group = std::move(pg);
// We succeeded.
R_SUCCEED();
}
Result MapSharedBufferIntoProcessAddressSpace(Common::ProcessAddress* out_map_address,
std::unique_ptr<Kernel::KPageGroup>& pg,
Kernel::KProcess* process, Core::System& system) {
using Core::Memory::YUZU_PAGESIZE;
auto& page_table = process->GetPageTable();
// Get bounds of where mapping is possible.
const VAddr alias_code_begin = GetInteger(page_table.GetAliasCodeRegionStart());
const VAddr alias_code_size = page_table.GetAliasCodeRegionSize() / YUZU_PAGESIZE;
@@ -64,9 +84,6 @@ Result AllocateIoForProcessAddressSpace(Common::ProcessAddress* out_map_address,
// Return failure, if necessary
R_UNLESS(i < 64, res);
// Return the mapped page group.
*out_page_group = std::move(pg);
// We succeeded.
R_SUCCEED();
}
@@ -135,6 +152,13 @@ Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, Nvidia::D
R_RETURN(AllocNvMapHandle(*nvmap, *out_handle, buffer, size, nvmap_fd));
}
void FreeHandle(u32 handle, Nvidia::Module& nvdrv, Nvidia::DeviceFD nvmap_fd) {
auto nvmap = nvdrv.GetDevice<Nvidia::Devices::nvmap>(nvmap_fd);
ASSERT(nvmap != nullptr);
R_ASSERT(FreeNvMapHandle(*nvmap, handle, nvmap_fd));
}
constexpr auto SharedBufferBlockLinearFormat = android::PixelFormat::Rgba8888;
constexpr u32 SharedBufferBlockLinearBpp = 4;
@@ -186,53 +210,97 @@ FbShareBufferManager::FbShareBufferManager(Core::System& system, Nvnflinger& fli
FbShareBufferManager::~FbShareBufferManager() = default;
Result FbShareBufferManager::Initialize(u64* out_buffer_id, u64* out_layer_id, u64 display_id) {
Result FbShareBufferManager::Initialize(Kernel::KProcess* owner_process, u64* out_buffer_id,
u64* out_layer_handle, u64 display_id,
LayerBlending blending) {
std::scoped_lock lk{m_guard};
// Ensure we have not already created a buffer.
R_UNLESS(m_buffer_id == 0, VI::ResultOperationFailed);
// Ensure we haven't already created.
const u64 aruid = owner_process->GetProcessId();
R_UNLESS(!m_sessions.contains(aruid), VI::ResultPermissionDenied);
// Allocate memory and space for the shared buffer.
Common::ProcessAddress map_address;
R_TRY(AllocateIoForProcessAddressSpace(std::addressof(map_address),
std::addressof(m_buffer_page_group), m_system,
SharedBufferSize));
// Allocate memory for the shared buffer if needed.
if (!m_buffer_page_group) {
R_TRY(AllocateSharedBufferMemory(std::addressof(m_buffer_page_group), m_system,
SharedBufferSize));
// Record buffer id.
m_buffer_id = m_next_buffer_id++;
// Record display id.
m_display_id = display_id;
}
// Map into process.
Common::ProcessAddress map_address{};
R_TRY(MapSharedBufferIntoProcessAddressSpace(std::addressof(map_address), m_buffer_page_group,
owner_process, m_system));
// Create new session.
auto [it, was_emplaced] = m_sessions.emplace(aruid, FbShareSession{});
auto& session = it->second;
auto& container = m_nvdrv->GetContainer();
m_session_id = container.OpenSession(m_system.ApplicationProcess());
m_nvmap_fd = m_nvdrv->Open("/dev/nvmap", m_session_id);
session.session_id = container.OpenSession(owner_process);
session.nvmap_fd = m_nvdrv->Open("/dev/nvmap", session.session_id);
// Create an nvmap handle for the buffer and assign the memory to it.
R_TRY(AllocateHandleForBuffer(std::addressof(m_buffer_nvmap_handle), *m_nvdrv, m_nvmap_fd,
map_address, SharedBufferSize));
// Record the display id.
m_display_id = display_id;
R_TRY(AllocateHandleForBuffer(std::addressof(session.buffer_nvmap_handle), *m_nvdrv,
session.nvmap_fd, map_address, SharedBufferSize));
// Create and open a layer for the display.
m_layer_id = m_flinger.CreateLayer(m_display_id).value();
m_flinger.OpenLayer(m_layer_id);
// Set up the buffer.
m_buffer_id = m_next_buffer_id++;
session.layer_id = m_flinger.CreateLayer(m_display_id, blending).value();
m_flinger.OpenLayer(session.layer_id);
// Get the layer.
VI::Layer* layer = m_flinger.FindLayer(m_display_id, m_layer_id);
VI::Layer* layer = m_flinger.FindLayer(m_display_id, session.layer_id);
ASSERT(layer != nullptr);
// Get the producer and set preallocated buffers.
auto& producer = layer->GetBufferQueue();
MakeGraphicBuffer(producer, 0, m_buffer_nvmap_handle);
MakeGraphicBuffer(producer, 1, m_buffer_nvmap_handle);
MakeGraphicBuffer(producer, 0, session.buffer_nvmap_handle);
MakeGraphicBuffer(producer, 1, session.buffer_nvmap_handle);
// Assign outputs.
*out_buffer_id = m_buffer_id;
*out_layer_id = m_layer_id;
*out_layer_handle = session.layer_id;
// We succeeded.
R_SUCCEED();
}
void FbShareBufferManager::Finalize(Kernel::KProcess* owner_process) {
std::scoped_lock lk{m_guard};
if (m_buffer_id == 0) {
return;
}
const u64 aruid = owner_process->GetProcessId();
const auto it = m_sessions.find(aruid);
if (it == m_sessions.end()) {
return;
}
auto& session = it->second;
// Destroy the layer.
m_flinger.DestroyLayer(session.layer_id);
// Close nvmap handle.
FreeHandle(session.buffer_nvmap_handle, *m_nvdrv, session.nvmap_fd);
// Close nvmap device.
m_nvdrv->Close(session.nvmap_fd);
// Close session.
auto& container = m_nvdrv->GetContainer();
container.CloseSession(session.session_id);
// Erase.
m_sessions.erase(it);
}
Result FbShareBufferManager::GetSharedBufferMemoryHandleId(u64* out_buffer_size,
s32* out_nvmap_handle,
SharedMemoryPoolLayout* out_pool_layout,
@@ -242,17 +310,18 @@ Result FbShareBufferManager::GetSharedBufferMemoryHandleId(u64* out_buffer_size,
R_UNLESS(m_buffer_id > 0, VI::ResultNotFound);
R_UNLESS(buffer_id == m_buffer_id, VI::ResultNotFound);
R_UNLESS(m_sessions.contains(applet_resource_user_id), VI::ResultNotFound);
*out_pool_layout = SharedBufferPoolLayout;
*out_buffer_size = SharedBufferSize;
*out_nvmap_handle = m_buffer_nvmap_handle;
*out_nvmap_handle = m_sessions[applet_resource_user_id].buffer_nvmap_handle;
R_SUCCEED();
}
Result FbShareBufferManager::GetLayerFromId(VI::Layer** out_layer, u64 layer_id) {
// Ensure the layer id is valid.
R_UNLESS(m_layer_id > 0 && layer_id == m_layer_id, VI::ResultNotFound);
R_UNLESS(layer_id > 0, VI::ResultNotFound);
// Get the layer.
VI::Layer* layer = m_flinger.FindLayer(m_display_id, layer_id);
@@ -309,6 +378,10 @@ Result FbShareBufferManager::PresentSharedFrameBuffer(android::Fence fence,
android::Status::NoError,
VI::ResultOperationFailed);
ON_RESULT_FAILURE {
producer.CancelBuffer(static_cast<s32>(slot), fence);
};
// Queue the buffer to the producer.
android::QueueBufferInput input{};
android::QueueBufferOutput output{};
@@ -342,4 +415,33 @@ Result FbShareBufferManager::GetSharedFrameBufferAcquirableEvent(Kernel::KReadab
R_SUCCEED();
}
Result FbShareBufferManager::WriteAppletCaptureBuffer(bool* out_was_written, s32* out_layer_index) {
std::vector<u8> capture_buffer(m_system.GPU().GetAppletCaptureBuffer());
Common::ScratchBuffer<u32> scratch;
// TODO: this could be optimized
s64 e = -1280 * 768 * 4;
for (auto& block : *m_buffer_page_group) {
u8* start = m_system.DeviceMemory().GetPointer<u8>(block.GetAddress());
u8* end = m_system.DeviceMemory().GetPointer<u8>(block.GetAddress() + block.GetSize());
for (; start < end; start++) {
*start = 0;
if (e >= 0 && e < static_cast<s64>(capture_buffer.size())) {
*start = capture_buffer[e];
}
e++;
}
m_system.GPU().Host1x().MemoryManager().ApplyOpOnPointer(start, scratch, [&](DAddr addr) {
m_system.GPU().InvalidateRegion(addr, end - start);
});
}
*out_was_written = true;
*out_layer_index = 1;
R_SUCCEED();
}
} // namespace Service::Nvnflinger

View File

@@ -3,9 +3,12 @@
#pragma once
#include <map>
#include "common/math_util.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/hle/service/nvnflinger/hwc_layer.h"
#include "core/hle/service/nvnflinger/nvnflinger.h"
#include "core/hle/service/nvnflinger/ui/fence.h"
@@ -29,13 +32,18 @@ struct SharedMemoryPoolLayout {
};
static_assert(sizeof(SharedMemoryPoolLayout) == 0x188, "SharedMemoryPoolLayout has wrong size");
struct FbShareSession;
class FbShareBufferManager final {
public:
explicit FbShareBufferManager(Core::System& system, Nvnflinger& flinger,
std::shared_ptr<Nvidia::Module> nvdrv);
~FbShareBufferManager();
Result Initialize(u64* out_buffer_id, u64* out_layer_handle, u64 display_id);
Result Initialize(Kernel::KProcess* owner_process, u64* out_buffer_id, u64* out_layer_handle,
u64 display_id, LayerBlending blending);
void Finalize(Kernel::KProcess* owner_process);
Result GetSharedBufferMemoryHandleId(u64* out_buffer_size, s32* out_nvmap_handle,
SharedMemoryPoolLayout* out_pool_layout, u64 buffer_id,
u64 applet_resource_user_id);
@@ -45,6 +53,8 @@ public:
u32 transform, s32 swap_interval, u64 layer_id, s64 slot);
Result GetSharedFrameBufferAcquirableEvent(Kernel::KReadableEvent** out_event, u64 layer_id);
Result WriteAppletCaptureBuffer(bool* out_was_written, s32* out_layer_index);
private:
Result GetLayerFromId(VI::Layer** out_layer, u64 layer_id);
@@ -52,11 +62,8 @@ private:
u64 m_next_buffer_id = 1;
u64 m_display_id = 0;
u64 m_buffer_id = 0;
u64 m_layer_id = 0;
u32 m_buffer_nvmap_handle = 0;
SharedMemoryPoolLayout m_pool_layout = {};
Nvidia::DeviceFD m_nvmap_fd = {};
Nvidia::NvCore::SessionId m_session_id = {};
std::map<u64, FbShareSession> m_sessions;
std::unique_ptr<Kernel::KPageGroup> m_buffer_page_group;
std::mutex m_guard;
@@ -65,4 +72,11 @@ private:
std::shared_ptr<Nvidia::Module> m_nvdrv;
};
struct FbShareSession {
Nvidia::DeviceFD nvmap_fd = {};
Nvidia::NvCore::SessionId session_id = {};
u64 layer_id = {};
u32 buffer_nvmap_handle = 0;
};
} // namespace Service::Nvnflinger

View File

@@ -109,6 +109,7 @@ u32 HardwareComposer::ComposeLocked(f32* out_speed_scale, VI::Display& display,
.height = igbp_buffer.Height(),
.stride = igbp_buffer.Stride(),
.z_index = 0,
.blending = layer.GetBlending(),
.transform = static_cast<android::BufferTransformFlags>(item.transform),
.crop_rect = item.crop,
.acquire_fence = item.fence,

View File

@@ -11,6 +11,18 @@
namespace Service::Nvnflinger {
// hwc_layer_t::blending values
enum class LayerBlending : u32 {
// No blending
None = 0x100,
// ONE / ONE_MINUS_SRC_ALPHA
Premultiplied = 0x105,
// SRC_ALPHA / ONE_MINUS_SRC_ALPHA
Coverage = 0x405,
};
struct HwcLayer {
u32 buffer_handle;
u32 offset;
@@ -19,6 +31,7 @@ struct HwcLayer {
u32 height;
u32 stride;
s32 z_index;
LayerBlending blending;
android::BufferTransformFlags transform;
Common::Rectangle<int> crop_rect;
android::Fence acquire_fence;

View File

@@ -157,7 +157,7 @@ bool Nvnflinger::CloseDisplay(u64 display_id) {
return true;
}
std::optional<u64> Nvnflinger::CreateLayer(u64 display_id) {
std::optional<u64> Nvnflinger::CreateLayer(u64 display_id, LayerBlending blending) {
const auto lock_guard = Lock();
auto* const display = FindDisplay(display_id);
@@ -166,13 +166,14 @@ std::optional<u64> Nvnflinger::CreateLayer(u64 display_id) {
}
const u64 layer_id = next_layer_id++;
CreateLayerAtId(*display, layer_id);
CreateLayerAtId(*display, layer_id, blending);
return layer_id;
}
void Nvnflinger::CreateLayerAtId(VI::Display& display, u64 layer_id) {
void Nvnflinger::CreateLayerAtId(VI::Display& display, u64 layer_id, LayerBlending blending) {
const auto buffer_id = next_buffer_queue_id++;
display.CreateLayer(layer_id, buffer_id, nvdrv->container);
display.FindLayer(layer_id)->SetBlending(blending);
}
bool Nvnflinger::OpenLayer(u64 layer_id) {

View File

@@ -15,6 +15,7 @@
#include "common/thread.h"
#include "core/hle/result.h"
#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/nvnflinger/hwc_layer.h"
namespace Common {
class Event;
@@ -72,7 +73,8 @@ public:
/// Creates a layer on the specified display and returns the layer ID.
///
/// If an invalid display ID is specified, then an empty optional is returned.
[[nodiscard]] std::optional<u64> CreateLayer(u64 display_id);
[[nodiscard]] std::optional<u64> CreateLayer(u64 display_id,
LayerBlending blending = LayerBlending::None);
/// Opens a layer on all displays for the given layer ID.
bool OpenLayer(u64 layer_id);
@@ -128,7 +130,7 @@ private:
[[nodiscard]] VI::Layer* FindLayer(u64 display_id, u64 layer_id);
/// Creates a layer with the specified layer ID in the desired display.
void CreateLayerAtId(VI::Display& display, u64 layer_id);
void CreateLayerAtId(VI::Display& display, u64 layer_id, LayerBlending blending);
void SplitVSync(std::stop_token stop_token);

View File

@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "core/hle/service/nvnflinger/hwc_layer.h"
#include "core/hle/service/vi/layer/vi_layer.h"
namespace Service::VI {
@@ -8,8 +9,9 @@ namespace Service::VI {
Layer::Layer(u64 layer_id_, u32 binder_id_, android::BufferQueueCore& core_,
android::BufferQueueProducer& binder_,
std::shared_ptr<android::BufferItemConsumer>&& consumer_)
: layer_id{layer_id_}, binder_id{binder_id_}, core{core_}, binder{binder_},
consumer{std::move(consumer_)}, open{false}, visible{true} {}
: layer_id{layer_id_}, binder_id{binder_id_}, core{core_}, binder{binder_}, consumer{std::move(
consumer_)},
blending{Nvnflinger::LayerBlending::None}, open{false}, visible{true} {}
Layer::~Layer() = default;

View File

@@ -14,6 +14,10 @@ class BufferQueueCore;
class BufferQueueProducer;
} // namespace Service::android
namespace Service::Nvnflinger {
enum class LayerBlending : u32;
}
namespace Service::VI {
/// Represents a single display layer.
@@ -92,12 +96,21 @@ public:
return !std::exchange(open, true);
}
Nvnflinger::LayerBlending GetBlending() {
return blending;
}
void SetBlending(Nvnflinger::LayerBlending b) {
blending = b;
}
private:
const u64 layer_id;
const u32 binder_id;
android::BufferQueueCore& core;
android::BufferQueueProducer& binder;
std::shared_ptr<android::BufferItemConsumer> consumer;
Service::Nvnflinger::LayerBlending blending;
bool open;
bool visible;
};

View File

@@ -64,6 +64,8 @@ public:
Memory(Memory&&) = default;
Memory& operator=(Memory&&) = delete;
static constexpr bool HAS_FLUSH_INVALIDATION = false;
/**
* Resets the state of the Memory system.
*/

View File

@@ -401,6 +401,14 @@ void Config::ReadNetworkValues() {
EndGroup();
}
void Config::ReadLibraryAppletValues() {
BeginGroup(Settings::TranslateCategory(Settings::Category::LibraryApplet));
ReadCategory(Settings::Category::LibraryApplet);
EndGroup();
}
void Config::ReadValues() {
if (global) {
ReadDataStorageValues();
@@ -410,6 +418,7 @@ void Config::ReadValues() {
ReadServiceValues();
ReadWebServiceValues();
ReadMiscellaneousValues();
ReadLibraryAppletValues();
}
ReadControlValues();
ReadCoreValues();
@@ -511,6 +520,7 @@ void Config::SaveValues() {
SaveNetworkValues();
SaveWebServiceValues();
SaveMiscellaneousValues();
SaveLibraryAppletValues();
} else {
LOG_DEBUG(Config, "Saving only generic configuration values");
}
@@ -691,6 +701,14 @@ void Config::SaveWebServiceValues() {
EndGroup();
}
void Config::SaveLibraryAppletValues() {
BeginGroup(Settings::TranslateCategory(Settings::Category::LibraryApplet));
WriteCategory(Settings::Category::LibraryApplet);
EndGroup();
}
bool Config::ReadBooleanSetting(const std::string& key, const std::optional<bool> default_value) {
std::string full_key = GetFullKey(key, false);
if (!default_value.has_value()) {

View File

@@ -88,6 +88,7 @@ protected:
void ReadSystemValues();
void ReadWebServiceValues();
void ReadNetworkValues();
void ReadLibraryAppletValues();
// Read platform specific sections
virtual void ReadHidbusValues() = 0;
@@ -121,6 +122,7 @@ protected:
void SaveScreenshotValues();
void SaveSystemValues();
void SaveWebServiceValues();
void SaveLibraryAppletValues();
// Save platform specific sections
virtual void SaveHidbusValues() = 0;

View File

@@ -52,9 +52,42 @@ ResourceManager::ResourceManager(Core::System& system_,
std::shared_ptr<HidFirmwareSettings> settings)
: firmware_settings{settings}, system{system_}, service_context{system_, "hid"} {
applet_resource = std::make_shared<AppletResource>(system);
// Register update callbacks
npad_update_event = Core::Timing::CreateEvent("HID::UpdatePadCallback",
[this](s64 time, std::chrono::nanoseconds ns_late)
-> std::optional<std::chrono::nanoseconds> {
UpdateNpad(ns_late);
return std::nullopt;
});
default_update_event = Core::Timing::CreateEvent(
"HID::UpdateDefaultCallback",
[this](s64 time,
std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> {
UpdateControllers(ns_late);
return std::nullopt;
});
mouse_keyboard_update_event = Core::Timing::CreateEvent(
"HID::UpdateMouseKeyboardCallback",
[this](s64 time,
std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> {
UpdateMouseKeyboard(ns_late);
return std::nullopt;
});
motion_update_event = Core::Timing::CreateEvent(
"HID::UpdateMotionCallback",
[this](s64 time,
std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> {
UpdateMotion(ns_late);
return std::nullopt;
});
}
ResourceManager::~ResourceManager() {
system.CoreTiming().UnscheduleEvent(npad_update_event);
system.CoreTiming().UnscheduleEvent(default_update_event);
system.CoreTiming().UnscheduleEvent(mouse_keyboard_update_event);
system.CoreTiming().UnscheduleEvent(motion_update_event);
system.CoreTiming().UnscheduleEvent(touch_update_event);
input_event->Finalize();
};
@@ -201,6 +234,7 @@ void ResourceManager::InitializeHidCommonSampler() {
debug_pad->SetAppletResource(applet_resource, &shared_mutex);
digitizer->SetAppletResource(applet_resource, &shared_mutex);
unique_pad->SetAppletResource(applet_resource, &shared_mutex);
keyboard->SetAppletResource(applet_resource, &shared_mutex);
const auto settings =
@@ -214,6 +248,14 @@ void ResourceManager::InitializeHidCommonSampler() {
home_button->SetAppletResource(applet_resource, &shared_mutex);
sleep_button->SetAppletResource(applet_resource, &shared_mutex);
capture_button->SetAppletResource(applet_resource, &shared_mutex);
system.CoreTiming().ScheduleLoopingEvent(npad_update_ns, npad_update_ns, npad_update_event);
system.CoreTiming().ScheduleLoopingEvent(default_update_ns, default_update_ns,
default_update_event);
system.CoreTiming().ScheduleLoopingEvent(mouse_keyboard_update_ns, mouse_keyboard_update_ns,
mouse_keyboard_update_event);
system.CoreTiming().ScheduleLoopingEvent(motion_update_ns, motion_update_ns,
motion_update_event);
}
void ResourceManager::InitializeTouchScreenSampler() {
@@ -465,55 +507,9 @@ IAppletResource::IAppletResource(Core::System& system_, std::shared_ptr<Resource
{0, &IAppletResource::GetSharedMemoryHandle, "GetSharedMemoryHandle"},
};
RegisterHandlers(functions);
// Register update callbacks
npad_update_event = Core::Timing::CreateEvent(
"HID::UpdatePadCallback",
[this, resource](
s64 time, std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> {
const auto guard = LockService();
resource->UpdateNpad(ns_late);
return std::nullopt;
});
default_update_event = Core::Timing::CreateEvent(
"HID::UpdateDefaultCallback",
[this, resource](
s64 time, std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> {
const auto guard = LockService();
resource->UpdateControllers(ns_late);
return std::nullopt;
});
mouse_keyboard_update_event = Core::Timing::CreateEvent(
"HID::UpdateMouseKeyboardCallback",
[this, resource](
s64 time, std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> {
const auto guard = LockService();
resource->UpdateMouseKeyboard(ns_late);
return std::nullopt;
});
motion_update_event = Core::Timing::CreateEvent(
"HID::UpdateMotionCallback",
[this, resource](
s64 time, std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> {
const auto guard = LockService();
resource->UpdateMotion(ns_late);
return std::nullopt;
});
system.CoreTiming().ScheduleLoopingEvent(npad_update_ns, npad_update_ns, npad_update_event);
system.CoreTiming().ScheduleLoopingEvent(default_update_ns, default_update_ns,
default_update_event);
system.CoreTiming().ScheduleLoopingEvent(mouse_keyboard_update_ns, mouse_keyboard_update_ns,
mouse_keyboard_update_event);
system.CoreTiming().ScheduleLoopingEvent(motion_update_ns, motion_update_ns,
motion_update_event);
}
IAppletResource::~IAppletResource() {
system.CoreTiming().UnscheduleEvent(npad_update_event);
system.CoreTiming().UnscheduleEvent(default_update_event);
system.CoreTiming().UnscheduleEvent(mouse_keyboard_update_event);
system.CoreTiming().UnscheduleEvent(motion_update_event);
resource_manager->FreeAppletResourceId(aruid);
}

View File

@@ -147,6 +147,10 @@ private:
std::shared_ptr<SixAxis> six_axis{nullptr};
std::shared_ptr<SleepButton> sleep_button{nullptr};
std::shared_ptr<UniquePad> unique_pad{nullptr};
std::shared_ptr<Core::Timing::EventType> npad_update_event;
std::shared_ptr<Core::Timing::EventType> default_update_event;
std::shared_ptr<Core::Timing::EventType> mouse_keyboard_update_event;
std::shared_ptr<Core::Timing::EventType> motion_update_event;
// TODO: Create these resources
// std::shared_ptr<AudioControl> audio_control{nullptr};
@@ -179,11 +183,6 @@ public:
private:
void GetSharedMemoryHandle(HLERequestContext& ctx);
std::shared_ptr<Core::Timing::EventType> npad_update_event{nullptr};
std::shared_ptr<Core::Timing::EventType> default_update_event{nullptr};
std::shared_ptr<Core::Timing::EventType> mouse_keyboard_update_event{nullptr};
std::shared_ptr<Core::Timing::EventType> motion_update_event{nullptr};
u64 aruid{};
std::shared_ptr<ResourceManager> resource_manager;
};

View File

@@ -17,10 +17,6 @@ void Digitizer::OnInit() {}
void Digitizer::OnRelease() {}
void Digitizer::OnUpdate(const Core::Timing::CoreTiming& core_timing) {
if (!smart_update) {
return;
}
std::scoped_lock shared_lock{*shared_mutex};
const u64 aruid = applet_resource->GetActiveAruid();
auto* data = applet_resource->GetAruidData(aruid);

View File

@@ -20,8 +20,5 @@ public:
// When the controller is requesting an update for the shared memory
void OnUpdate(const Core::Timing::CoreTiming& core_timing) override;
private:
bool smart_update{};
};
} // namespace Service::HID

View File

@@ -102,6 +102,8 @@ Result NPad::Activate(u64 aruid) {
for (std::size_t i = 0; i < 19; ++i) {
WriteEmptyEntry(npad);
}
controller.is_active = true;
}
return ResultSuccess;
@@ -467,6 +469,13 @@ void NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing) {
continue;
}
bool is_set{};
npad_resource.IsSupportedNpadStyleSet(is_set, aruid);
// Wait until style is defined
if (!is_set) {
continue;
}
for (std::size_t i = 0; i < controller_data[aruid_index].size(); ++i) {
auto& controller = controller_data[aruid_index][i];
controller.shared_memory =
@@ -484,6 +493,10 @@ void NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing) {
continue;
}
if (!controller.is_active) {
continue;
}
RequestPadStateUpdate(aruid, controller.device->GetNpadIdType());
auto& pad_state = controller.npad_pad_state;
auto& libnx_state = controller.npad_libnx_state;
@@ -592,7 +605,9 @@ void NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing) {
libnx_state.npad_buttons.raw = pad_state.npad_buttons.raw;
libnx_state.l_stick = pad_state.l_stick;
libnx_state.r_stick = pad_state.r_stick;
npad->system_ext_lifo.WriteNextEntry(pad_state);
libnx_state.sampling_number =
npad->system_ext_lifo.ReadCurrentEntry().state.sampling_number + 1;
npad->system_ext_lifo.WriteNextEntry(libnx_state);
press_state |= static_cast<u64>(pad_state.npad_buttons.raw);
}
@@ -1060,6 +1075,7 @@ void NPad::UnregisterAppletResourceUserId(u64 aruid) {
// TODO: Remove this once abstract pad is emulated properly
const auto aruid_index = npad_resource.GetIndexFromAruid(aruid);
for (auto& controller : controller_data[aruid_index]) {
controller.is_active = false;
controller.is_connected = false;
controller.shared_memory = nullptr;
}

View File

@@ -164,6 +164,7 @@ private:
NpadInternalState* shared_memory = nullptr;
Core::HID::EmulatedController* device = nullptr;
bool is_active{};
bool is_connected{};
// Dual joycons can have only one side connected

View File

@@ -17,10 +17,6 @@ void UniquePad::OnInit() {}
void UniquePad::OnRelease() {}
void UniquePad::OnUpdate(const Core::Timing::CoreTiming& core_timing) {
if (!smart_update) {
return;
}
const u64 aruid = applet_resource->GetActiveAruid();
auto* data = applet_resource->GetAruidData(aruid);

View File

@@ -20,8 +20,5 @@ public:
// When the controller is requesting an update for the shared memory
void OnUpdate(const Core::Timing::CoreTiming& core_timing) override;
private:
bool smart_update{};
};
} // namespace Service::HID

View File

@@ -60,7 +60,8 @@ public:
Add(spv::ImageOperandsMask::ConstOffsets, offsets);
}
explicit ImageOperands(Id lod, Id ms) {
explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, Id lod, Id ms) {
AddOffset(ctx, offset, ImageFetchOffsetAllowed);
if (Sirit::ValidId(lod)) {
Add(spv::ImageOperandsMask::Lod, lod);
}
@@ -311,37 +312,6 @@ Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info,
return coords;
}
}
void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, Id& coords,
Id offset) {
if (!Sirit::ValidId(offset)) {
return;
}
Id result_type{};
switch (info.type) {
case TextureType::Buffer:
case TextureType::Color1D:
case TextureType::ColorArray1D: {
result_type = ctx.U32[1];
break;
}
case TextureType::Color2D:
case TextureType::Color2DRect:
case TextureType::ColorArray2D: {
result_type = ctx.U32[2];
break;
}
case TextureType::Color3D: {
result_type = ctx.U32[3];
break;
}
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
return;
}
coords = ctx.OpIAdd(result_type, coords, offset);
}
} // Anonymous namespace
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
@@ -524,10 +494,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
operands.Span());
}
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms) {
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, Id lod, Id ms) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
AddOffsetToCoordinates(ctx, info, coords, offset);
if (info.type == TextureType::Buffer) {
lod = Id{};
}
@@ -535,7 +504,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
// This image is multisampled, lod must be implicit
lod = Id{};
}
const ImageOperands operands(lod, ms);
const ImageOperands operands(ctx, offset, lod, ms);
return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
}

View File

@@ -537,8 +537,8 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
const IR::Value& offset, const IR::Value& offset2);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, Id lod, Id ms);
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
const IR::Value& skip_mips);
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);

View File

@@ -18,6 +18,7 @@ add_library(video_core STATIC
buffer_cache/usage_tracker.h
buffer_cache/word_manager.h
cache_types.h
capture.h
cdma_pusher.cpp
cdma_pusher.h
compatible_formats.cpp
@@ -59,8 +60,8 @@ add_library(video_core STATIC
framebuffer_config.h
fsr.cpp
fsr.h
host1x/codecs/codec.cpp
host1x/codecs/codec.h
host1x/codecs/decoder.cpp
host1x/codecs/decoder.h
host1x/codecs/h264.cpp
host1x/codecs/h264.h
host1x/codecs/vp8.cpp
@@ -79,8 +80,6 @@ add_library(video_core STATIC
host1x/nvdec.cpp
host1x/nvdec.h
host1x/nvdec_common.h
host1x/sync_manager.cpp
host1x/sync_manager.h
host1x/syncpoint_manager.cpp
host1x/syncpoint_manager.h
host1x/vic.cpp
@@ -101,6 +100,7 @@ add_library(video_core STATIC
memory_manager.cpp
memory_manager.h
precompiled_headers.h
present.h
pte_kind.h
query_cache/bank_base.h
query_cache/query_base.h

View File

@@ -1546,7 +1546,10 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
std::span<const u8> upload_span;
const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
if (IsRangeGranular(device_addr, copy.size)) {
upload_span = std::span(device_memory.GetPointer<u8>(device_addr), copy.size);
auto* const ptr = device_memory.GetPointer<u8>(device_addr);
if (ptr != nullptr) {
upload_span = std::span(ptr, copy.size);
}
} else {
if (immediate_buffer.empty()) {
immediate_buffer = ImmediateBuffer(largest_copy);

36
src/video_core/capture.h Normal file
View File

@@ -0,0 +1,36 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/alignment.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "core/frontend/framebuffer_layout.h"
#include "video_core/surface.h"
namespace VideoCore::Capture {
constexpr u32 BlockHeight = 4;
constexpr u32 BlockDepth = 0;
constexpr u32 BppLog2 = 2;
constexpr auto PixelFormat = Surface::PixelFormat::B8G8R8A8_UNORM;
constexpr auto LinearWidth = Layout::ScreenUndocked::Width;
constexpr auto LinearHeight = Layout::ScreenUndocked::Height;
constexpr auto LinearDepth = 1U;
constexpr auto BytesPerPixel = 4U;
constexpr auto TiledWidth = LinearWidth;
constexpr auto TiledHeight = Common::AlignUpLog2(LinearHeight, BlockHeight + BlockDepth + BppLog2);
constexpr auto TiledSize = TiledWidth * TiledHeight * (1 << BppLog2);
constexpr Layout::FramebufferLayout Layout{
.width = LinearWidth,
.height = LinearHeight,
.screen = {0, 0, LinearWidth, LinearHeight},
.is_srgb = false,
};
} // namespace VideoCore::Capture

View File

@@ -2,136 +2,130 @@
// SPDX-License-Identifier: MIT
#include <bit>
#include "common/thread.h"
#include "core/core.h"
#include "video_core/cdma_pusher.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/host1x/control.h"
#include "video_core/host1x/host1x.h"
#include "video_core/host1x/nvdec.h"
#include "video_core/host1x/nvdec_common.h"
#include "video_core/host1x/sync_manager.h"
#include "video_core/host1x/vic.h"
#include "video_core/memory_manager.h"
namespace Tegra {
CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_)
: host1x{host1x_}, nvdec_processor(std::make_shared<Host1x::Nvdec>(host1x)),
vic_processor(std::make_unique<Host1x::Vic>(host1x, nvdec_processor)),
host1x_processor(std::make_unique<Host1x::Control>(host1x)),
sync_manager(std::make_unique<Host1x::SyncptIncrManager>(host1x)) {}
CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id)
: host1x{host1x_}, memory_manager{host1x.GMMU()},
host_processor{std::make_unique<Host1x::Control>(host1x_)}, current_class{
static_cast<ChClassId>(id)} {
thread = std::jthread([this](std::stop_token stop_token) { ProcessEntries(stop_token); });
}
CDmaPusher::~CDmaPusher() = default;
void CDmaPusher::ProcessEntries(ChCommandHeaderList&& entries) {
for (const auto& value : entries) {
if (mask != 0) {
const auto lbs = static_cast<u32>(std::countr_zero(mask));
mask &= ~(1U << lbs);
ExecuteCommand(offset + lbs, value.raw);
continue;
} else if (count != 0) {
--count;
ExecuteCommand(offset, value.raw);
if (incrementing) {
++offset;
void CDmaPusher::ProcessEntries(std::stop_token stop_token) {
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
ChCommandHeaderList command_list{host1x.System().ApplicationMemory(), 0, 0};
u32 count{};
u32 method_offset{};
u32 mask{};
bool incrementing{};
while (!stop_token.stop_requested()) {
{
std::unique_lock l{command_mutex};
Common::CondvarWait(command_cv, l, stop_token,
[this]() { return command_lists.size() > 0; });
if (stop_token.stop_requested()) {
return;
}
continue;
command_list = std::move(command_lists.front());
command_lists.pop_front();
}
const auto mode = value.submission_mode.Value();
switch (mode) {
case ChSubmissionMode::SetClass: {
mask = value.value & 0x3f;
offset = value.method_offset;
current_class = static_cast<ChClassId>((value.value >> 6) & 0x3ff);
break;
}
case ChSubmissionMode::Incrementing:
case ChSubmissionMode::NonIncrementing:
count = value.value;
offset = value.method_offset;
incrementing = mode == ChSubmissionMode::Incrementing;
break;
case ChSubmissionMode::Mask:
mask = value.value;
offset = value.method_offset;
break;
case ChSubmissionMode::Immediate: {
const u32 data = value.value & 0xfff;
offset = value.method_offset;
ExecuteCommand(offset, data);
break;
}
default:
UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast<u32>(mode));
break;
size_t i = 0;
for (const auto value : command_list) {
i++;
if (mask != 0) {
const auto lbs = static_cast<u32>(std::countr_zero(mask));
mask &= ~(1U << lbs);
ExecuteCommand(method_offset + lbs, value.raw);
continue;
} else if (count != 0) {
--count;
ExecuteCommand(method_offset, value.raw);
if (incrementing) {
++method_offset;
}
continue;
}
const auto mode = value.submission_mode.Value();
switch (mode) {
case ChSubmissionMode::SetClass: {
mask = value.value & 0x3f;
method_offset = value.method_offset;
current_class = static_cast<ChClassId>((value.value >> 6) & 0x3ff);
break;
}
case ChSubmissionMode::Incrementing:
case ChSubmissionMode::NonIncrementing:
count = value.value;
method_offset = value.method_offset;
incrementing = mode == ChSubmissionMode::Incrementing;
break;
case ChSubmissionMode::Mask:
mask = value.value;
method_offset = value.method_offset;
break;
case ChSubmissionMode::Immediate: {
const u32 data = value.value & 0xfff;
method_offset = value.method_offset;
ExecuteCommand(method_offset, data);
break;
}
default:
LOG_ERROR(HW_GPU, "Bad command at index {} (bytes 0x{:X}), buffer size {}", i - 1,
(i - 1) * sizeof(u32), command_list.size());
UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!",
static_cast<u32>(mode));
break;
}
}
}
}
void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
void CDmaPusher::ExecuteCommand(u32 method, u32 arg) {
switch (current_class) {
case ChClassId::NvDec:
ThiStateWrite(nvdec_thi_state, offset, data);
switch (static_cast<ThiMethod>(offset)) {
case ThiMethod::IncSyncpt: {
LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
if (cond == 0) {
sync_manager->Increment(syncpoint_id);
} else {
sync_manager->SignalDone(
sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
}
break;
}
case ThiMethod::SetMethod1:
LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
static_cast<u32>(nvdec_thi_state.method_0));
nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);
break;
default:
break;
}
break;
case ChClassId::GraphicsVic:
ThiStateWrite(vic_thi_state, static_cast<u32>(state_offset), {data});
switch (static_cast<ThiMethod>(state_offset)) {
case ThiMethod::IncSyncpt: {
LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method");
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
if (cond == 0) {
sync_manager->Increment(syncpoint_id);
} else {
sync_manager->SignalDone(
sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
}
break;
}
case ThiMethod::SetMethod1:
LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
static_cast<u32>(vic_thi_state.method_0), data);
vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0),
data);
break;
default:
break;
}
break;
case ChClassId::Control:
// This device is mainly for syncpoint synchronization
LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data);
LOG_TRACE(Service_NVDRV, "Class {} method 0x{:X} arg 0x{:X}",
static_cast<u32>(current_class), method, arg);
host_processor->ProcessMethod(static_cast<Host1x::Control::Method>(method), arg);
break;
default:
UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
break;
thi_regs.reg_array[method] = arg;
switch (static_cast<ThiMethod>(method)) {
case ThiMethod::IncSyncpt: {
const auto syncpoint_id = static_cast<u32>(arg & 0xFF);
[[maybe_unused]] const auto cond = static_cast<u32>((arg >> 8) & 0xFF);
LOG_TRACE(Service_NVDRV, "Class {} IncSyncpt Method, syncpt {} cond {}",
static_cast<u32>(current_class), syncpoint_id, cond);
auto& syncpoint_manager = host1x.GetSyncpointManager();
syncpoint_manager.IncrementGuest(syncpoint_id);
syncpoint_manager.IncrementHost(syncpoint_id);
break;
}
case ThiMethod::SetMethod1:
LOG_TRACE(Service_NVDRV, "Class {} method 0x{:X} arg 0x{:X}",
static_cast<u32>(current_class), static_cast<u32>(thi_regs.method_0), arg);
ProcessMethod(thi_regs.method_0, arg);
break;
default:
break;
}
}
}
void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset, u32 argument) {
u8* const offset_ptr = reinterpret_cast<u8*>(&state) + sizeof(u32) * state_offset;
std::memcpy(offset_ptr, &argument, sizeof(u32));
}
} // namespace Tegra

View File

@@ -3,12 +3,18 @@
#pragma once
#include <condition_variable>
#include <deque>
#include <memory>
#include <mutex>
#include <thread>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/polyfill_thread.h"
#include "core/memory.h"
namespace Tegra {
@@ -62,23 +68,31 @@ struct ChCommand {
std::vector<u32> arguments;
};
using ChCommandHeaderList = std::vector<ChCommandHeader>;
using ChCommandHeaderList =
Core::Memory::CpuGuestMemory<Tegra::ChCommandHeader, Core::Memory::GuestMemoryFlags::SafeRead>;
struct ThiRegisters {
u32_le increment_syncpt{};
INSERT_PADDING_WORDS(1);
u32_le increment_syncpt_error{};
u32_le ctx_switch_incremement_syncpt{};
INSERT_PADDING_WORDS(4);
u32_le ctx_switch{};
INSERT_PADDING_WORDS(1);
u32_le ctx_syncpt_eof{};
INSERT_PADDING_WORDS(5);
u32_le method_0{};
u32_le method_1{};
INSERT_PADDING_WORDS(12);
u32_le int_status{};
u32_le int_mask{};
static constexpr std::size_t NUM_REGS = 0x20;
union {
struct {
u32_le increment_syncpt;
INSERT_PADDING_WORDS_NOINIT(1);
u32_le increment_syncpt_error;
u32_le ctx_switch_incremement_syncpt;
INSERT_PADDING_WORDS_NOINIT(4);
u32_le ctx_switch;
INSERT_PADDING_WORDS_NOINIT(1);
u32_le ctx_syncpt_eof;
INSERT_PADDING_WORDS_NOINIT(5);
u32_le method_0;
u32_le method_1;
INSERT_PADDING_WORDS_NOINIT(12);
u32_le int_status;
u32_le int_mask;
};
std::array<u32, NUM_REGS> reg_array;
};
};
enum class ThiMethod : u32 {
@@ -89,32 +103,39 @@ enum class ThiMethod : u32 {
class CDmaPusher {
public:
explicit CDmaPusher(Host1x::Host1x& host1x);
~CDmaPusher();
CDmaPusher() = delete;
virtual ~CDmaPusher();
/// Process the command entry
void ProcessEntries(ChCommandHeaderList&& entries);
void PushEntries(ChCommandHeaderList&& entries) {
std::scoped_lock l{command_mutex};
command_lists.push_back(std::move(entries));
command_cv.notify_one();
}
protected:
explicit CDmaPusher(Host1x::Host1x& host1x, s32 id);
virtual void ProcessMethod(u32 method, u32 arg) = 0;
Host1x::Host1x& host1x;
Tegra::MemoryManager& memory_manager;
private:
/// Process the command entry
void ProcessEntries(std::stop_token stop_token);
/// Invoke command class devices to execute the command based on the current state
void ExecuteCommand(u32 state_offset, u32 data);
/// Write arguments value to the ThiRegisters member at the specified offset
void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument);
std::unique_ptr<Host1x::Control> host_processor;
Host1x::Host1x& host1x;
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
std::unique_ptr<Tegra::Host1x::Vic> vic_processor;
std::unique_ptr<Tegra::Host1x::Control> host1x_processor;
std::unique_ptr<Host1x::SyncptIncrManager> sync_manager;
ChClassId current_class{};
ThiRegisters vic_thi_state{};
ThiRegisters nvdec_thi_state{};
std::mutex command_mutex;
std::condition_variable_any command_cv;
std::deque<ChCommandHeaderList> command_lists;
std::jthread thread;
u32 count{};
u32 offset{};
u32 mask{};
bool incrementing{};
ThiRegisters thi_regs{};
ChClassId current_class;
};
} // namespace Tegra

View File

@@ -11,6 +11,12 @@
namespace Tegra {
enum class BlendMode {
Opaque,
Premultiplied,
Coverage,
};
/**
* Struct describing framebuffer configuration
*/
@@ -23,6 +29,7 @@ struct FramebufferConfig {
Service::android::PixelFormat pixel_format{};
Service::android::BufferTransformFlags transform_flags{};
Common::Rectangle<int> crop_rect{};
BlendMode blending{};
};
Common::Rectangle<f32> NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width,

View File

@@ -250,30 +250,6 @@ struct GPU::Impl {
gpu_thread.SubmitList(channel, std::move(entries));
}
/// Push GPU command buffer entries to be processed
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
if (!use_nvdec) {
return;
}
if (!cdma_pushers.contains(id)) {
cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(host1x));
}
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
// TODO(ameerj): RE proper async nvdec operation
// gpu_thread.SubmitCommandBuffer(std::move(entries));
cdma_pushers[id]->ProcessEntries(std::move(entries));
}
/// Frees the CDMAPusher instance to free up resources
void ClearCdmaInstance(u32 id) {
const auto iter = cdma_pushers.find(id);
if (iter != cdma_pushers.end()) {
cdma_pushers.erase(iter);
}
}
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(DAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
@@ -347,11 +323,21 @@ struct GPU::Impl {
WaitForSyncOperation(wait_fence);
}
std::vector<u8> GetAppletCaptureBuffer() {
std::vector<u8> out;
const auto wait_fence =
RequestSyncOperation([&] { out = renderer->GetAppletCaptureBuffer(); });
gpu_thread.TickGPU();
WaitForSyncOperation(wait_fence);
return out;
}
GPU& gpu;
Core::System& system;
Host1x::Host1x& host1x;
std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
std::unique_ptr<VideoCore::RendererBase> renderer;
VideoCore::RasterizerInterface* rasterizer = nullptr;
const bool use_nvdec;
@@ -505,6 +491,10 @@ void GPU::RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
impl->RequestComposite(std::move(layers), std::move(fences));
}
std::vector<u8> GPU::GetAppletCaptureBuffer() {
return impl->GetAppletCaptureBuffer();
}
u64 GPU::GetTicks() const {
return impl->GetTicks();
}
@@ -541,14 +531,6 @@ void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
impl->PushGPUEntries(channel, std::move(entries));
}
void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
impl->PushCommandBuffer(id, entries);
}
void GPU::ClearCdmaInstance(u32 id) {
impl->ClearCdmaInstance(id);
}
VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) {
return impl->OnCPURead(addr, size);
}

View File

@@ -215,6 +215,8 @@ public:
void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
std::vector<Service::Nvidia::NvFence>&& fences);
std::vector<u8> GetAppletCaptureBuffer();
/// Performs any additional setup necessary in order to begin GPU emulation.
/// This can be used to launch any necessary threads and register any necessary
/// core timing events.
@@ -232,15 +234,6 @@ public:
/// Push GPU command entries to be processed
void PushGPUEntries(s32 channel, Tegra::CommandList&& entries);
/// Push GPU command buffer entries to be processed
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
/// Frees the CDMAPusher instance to free up resources
void ClearCdmaInstance(u32 id);
/// Swap buffers (render frame)
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
[[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size);

View File

@@ -12,6 +12,7 @@
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
#include "video_core/host1x/host1x.h"
#include "video_core/renderer_base.h"
namespace VideoCommon::GPUThread {

View File

@@ -1,113 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/settings.h"
#include "video_core/host1x/codecs/codec.h"
#include "video_core/host1x/codecs/h264.h"
#include "video_core/host1x/codecs/vp8.h"
#include "video_core/host1x/codecs/vp9.h"
#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
namespace Tegra {
Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs)
: host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)),
vp8_decoder(std::make_unique<Decoder::VP8>(host1x)),
vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {}
Codec::~Codec() = default;
void Codec::Initialize() {
initialized = decode_api.Initialize(current_codec);
}
void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
if (current_codec != codec) {
current_codec = codec;
LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
}
}
void Codec::Decode() {
const bool is_first_frame = !initialized;
if (is_first_frame) {
Initialize();
}
if (!initialized) {
return;
}
// Assemble bitstream.
bool vp9_hidden_frame = false;
size_t configuration_size = 0;
const auto packet_data = [&]() {
switch (current_codec) {
case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame);
case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
return vp8_decoder->ComposeFrame(state);
case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
vp9_decoder->ComposeFrame(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
return vp9_decoder->GetFrameBytes();
default:
ASSERT(false);
return std::span<const u8>{};
}
}();
// Send assembled bitstream to decoder.
if (!decode_api.SendPacket(packet_data, configuration_size)) {
return;
}
// Only receive/store visible frames.
if (vp9_hidden_frame) {
return;
}
// Receive output frames from decoder.
decode_api.ReceiveFrames(frames);
while (frames.size() > 10) {
LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame");
frames.pop();
}
}
std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() {
// Sometimes VIC will request more frames than have been decoded.
// in this case, return a blank frame and don't overwrite previous data.
if (frames.empty()) {
return {};
}
auto frame = std::move(frames.front());
frames.pop();
return frame;
}
Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
return current_codec;
}
std::string_view Codec::GetCurrentCodecName() const {
switch (current_codec) {
case Host1x::NvdecCommon::VideoCodec::None:
return "None";
case Host1x::NvdecCommon::VideoCodec::H264:
return "H264";
case Host1x::NvdecCommon::VideoCodec::VP8:
return "VP8";
case Host1x::NvdecCommon::VideoCodec::H265:
return "H265";
case Host1x::NvdecCommon::VideoCodec::VP9:
return "VP9";
default:
return "Unknown";
}
}
} // namespace Tegra

View File

@@ -1,63 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <optional>
#include <string_view>
#include <queue>
#include "common/common_types.h"
#include "video_core/host1x/ffmpeg/ffmpeg.h"
#include "video_core/host1x/nvdec_common.h"
namespace Tegra {
namespace Decoder {
class H264;
class VP8;
class VP9;
} // namespace Decoder
namespace Host1x {
class Host1x;
} // namespace Host1x
class Codec {
public:
explicit Codec(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs);
~Codec();
/// Initialize the codec, returning success or failure
void Initialize();
/// Sets NVDEC video stream codec
void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec);
/// Call decoders to construct headers, decode AVFrame with ffmpeg
void Decode();
/// Returns next decoded frame
[[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetCurrentFrame();
/// Returns the value of current_codec
[[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
/// Return name of the current codec
[[nodiscard]] std::string_view GetCurrentCodecName() const;
private:
bool initialized{};
Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
FFmpeg::DecodeApi decode_api;
Host1x::Host1x& host1x;
const Host1x::NvdecCommon::NvdecRegisters& state;
std::unique_ptr<Decoder::H264> h264_decoder;
std::unique_ptr<Decoder::VP8> vp8_decoder;
std::unique_ptr<Decoder::VP9> vp9_decoder;
std::queue<std::unique_ptr<FFmpeg::Frame>> frames{};
};
} // namespace Tegra

View File

@@ -0,0 +1,69 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/settings.h"
#include "video_core/host1x/codecs/decoder.h"
#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
namespace Tegra {
Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_,
Host1x::FrameQueue& frame_queue_)
: host1x(host1x_), memory_manager{host1x.GMMU()}, regs{regs_}, id{id_}, frame_queue{
frame_queue_} {}
Decoder::~Decoder() = default;
void Decoder::Decode() {
if (!initialized) {
return;
}
const auto packet_data = ComposeFrame();
// Send assembled bitstream to decoder.
if (!decode_api.SendPacket(packet_data)) {
return;
}
// Only receive/store visible frames.
if (vp9_hidden_frame) {
return;
}
// Receive output frames from decoder.
auto frame = decode_api.ReceiveFrame();
if (IsInterlaced()) {
auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = GetInterlacedOffsets();
auto frame_copy = frame;
if (!frame.get()) {
LOG_ERROR(HW_GPU, "Failed to decode interlaced frame for top 0x{:X} bottom 0x{:X}",
luma_top, luma_bottom);
}
if (UsingDecodeOrder()) {
frame_queue.PushDecodeOrder(id, luma_top, std::move(frame));
frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy));
} else {
frame_queue.PushPresentOrder(id, luma_top, std::move(frame));
frame_queue.PushPresentOrder(id, luma_bottom, std::move(frame_copy));
}
} else {
auto [luma_offset, chroma_offset] = GetProgressiveOffsets();
if (!frame.get()) {
LOG_ERROR(HW_GPU, "Failed to decode progressive frame for luma 0x{:X}", luma_offset);
}
if (UsingDecodeOrder()) {
frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame));
} else {
frame_queue.PushPresentOrder(id, luma_offset, std::move(frame));
}
}
}
} // namespace Tegra

View File

@@ -0,0 +1,64 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <mutex>
#include <optional>
#include <string_view>
#include <unordered_map>
#include <queue>
#include "common/common_types.h"
#include "video_core/host1x/ffmpeg/ffmpeg.h"
#include "video_core/host1x/nvdec_common.h"
namespace Tegra {
namespace Host1x {
class Host1x;
class FrameQueue;
} // namespace Host1x
class Decoder {
public:
virtual ~Decoder();
/// Call decoders to construct headers, decode AVFrame with ffmpeg
void Decode();
bool UsingDecodeOrder() const {
return decode_api.UsingDecodeOrder();
}
/// Returns the value of current_codec
[[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const {
return codec;
}
/// Return name of the current codec
[[nodiscard]] virtual std::string_view GetCurrentCodecName() const = 0;
protected:
explicit Decoder(Host1x::Host1x& host1x, s32 id,
const Host1x::NvdecCommon::NvdecRegisters& regs,
Host1x::FrameQueue& frame_queue);
virtual std::span<const u8> ComposeFrame() = 0;
virtual std::tuple<u64, u64> GetProgressiveOffsets() = 0;
virtual std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() = 0;
virtual bool IsInterlaced() = 0;
Host1x::Host1x& host1x;
Tegra::MemoryManager& memory_manager;
const Host1x::NvdecCommon::NvdecRegisters& regs;
s32 id;
Host1x::FrameQueue& frame_queue;
Host1x::NvdecCommon::VideoCodec codec;
FFmpeg::DecodeApi decode_api;
bool initialized{};
bool vp9_hidden_frame{};
};
} // namespace Tegra

View File

@@ -10,7 +10,7 @@
#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
namespace Tegra::Decoder {
namespace Tegra::Decoders {
namespace {
// ZigZag LUTs from libavcodec.
constexpr std::array<u8, 64> zig_zag_direct{
@@ -25,23 +25,56 @@ constexpr std::array<u8, 16> zig_zag_scan{
};
} // Anonymous namespace
H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {}
H264::H264(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
Host1x::FrameQueue& frame_queue_)
: Decoder{host1x_, id_, regs_, frame_queue_} {
codec = Host1x::NvdecCommon::VideoCodec::H264;
initialized = decode_api.Initialize(codec);
}
H264::~H264() = default;
std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
size_t* out_configuration_size, bool is_first_frame) {
H264DecoderContext context;
host1x.GMMU().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
std::tuple<u64, u64> H264::GetProgressiveOffsets() {
auto pic_idx{current_context.h264_parameter_set.curr_pic_idx};
auto luma{regs.surface_luma_offsets[pic_idx].Address() +
current_context.h264_parameter_set.luma_frame_offset.Address()};
auto chroma{regs.surface_chroma_offsets[pic_idx].Address() +
current_context.h264_parameter_set.chroma_frame_offset.Address()};
return {luma, chroma};
}
const s64 frame_number = context.h264_parameter_set.frame_number.Value();
std::tuple<u64, u64, u64, u64> H264::GetInterlacedOffsets() {
auto pic_idx{current_context.h264_parameter_set.curr_pic_idx};
auto luma_top{regs.surface_luma_offsets[pic_idx].Address() +
current_context.h264_parameter_set.luma_top_offset.Address()};
auto luma_bottom{regs.surface_luma_offsets[pic_idx].Address() +
current_context.h264_parameter_set.luma_bot_offset.Address()};
auto chroma_top{regs.surface_chroma_offsets[pic_idx].Address() +
current_context.h264_parameter_set.chroma_top_offset.Address()};
auto chroma_bottom{regs.surface_chroma_offsets[pic_idx].Address() +
current_context.h264_parameter_set.chroma_bot_offset.Address()};
return {luma_top, luma_bottom, chroma_top, chroma_bottom};
}
bool H264::IsInterlaced() {
return current_context.h264_parameter_set.luma_top_offset.Address() != 0 ||
current_context.h264_parameter_set.luma_bot_offset.Address() != 0;
}
std::span<const u8> H264::ComposeFrame() {
memory_manager.ReadBlock(regs.picture_info_offset.Address(), &current_context,
sizeof(H264DecoderContext));
const s64 frame_number = current_context.h264_parameter_set.frame_number.Value();
if (!is_first_frame && frame_number != 0) {
frame.resize_destructive(context.stream_len);
host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
*out_configuration_size = 0;
return frame;
frame_scratch.resize_destructive(current_context.stream_len);
memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data(),
frame_scratch.size());
return frame_scratch;
}
is_first_frame = false;
// Encode header
H264BitWriter writer{};
writer.WriteU(1, 24);
@@ -53,7 +86,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
writer.WriteU(31, 8);
writer.WriteUe(0);
const u32 chroma_format_idc =
static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value());
static_cast<u32>(current_context.h264_parameter_set.chroma_format_idc.Value());
writer.WriteUe(chroma_format_idc);
if (chroma_format_idc == 3) {
writer.WriteBit(false);
@@ -61,42 +94,44 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
writer.WriteUe(0);
writer.WriteUe(0);
writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
writer.WriteBit(current_context.qpprime_y_zero_transform_bypass_flag.Value() != 0);
writer.WriteBit(false); // Scaling matrix present flag
writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
writer.WriteUe(
static_cast<u32>(current_context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
const auto order_cnt_type =
static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value());
static_cast<u32>(current_context.h264_parameter_set.pic_order_cnt_type.Value());
writer.WriteUe(order_cnt_type);
if (order_cnt_type == 0) {
writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
writer.WriteUe(current_context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
} else if (order_cnt_type == 1) {
writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
writer.WriteBit(current_context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
writer.WriteSe(0);
writer.WriteSe(0);
writer.WriteUe(0);
}
const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
(context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
const s32 pic_height = current_context.h264_parameter_set.frame_height_in_mbs /
(current_context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
// TODO (ameerj): Where do we get this number, it seems to be particular for each stream
const auto nvdec_decoding = Settings::values.nvdec_emulation.GetValue();
const bool uses_gpu_decoding = nvdec_decoding == Settings::NvdecEmulation::Gpu;
const u32 max_num_ref_frames = uses_gpu_decoding ? 6u : 16u;
u32 max_num_ref_frames =
std::max(std::max(current_context.h264_parameter_set.num_refidx_l0_default_active,
current_context.h264_parameter_set.num_refidx_l1_default_active) +
1,
4);
writer.WriteUe(max_num_ref_frames);
writer.WriteBit(false);
writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
writer.WriteUe(current_context.h264_parameter_set.pic_width_in_mbs - 1);
writer.WriteUe(pic_height - 1);
writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
writer.WriteBit(current_context.h264_parameter_set.frame_mbs_only_flag != 0);
if (!context.h264_parameter_set.frame_mbs_only_flag) {
writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
if (!current_context.h264_parameter_set.frame_mbs_only_flag) {
writer.WriteBit(current_context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
}
writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
writer.WriteBit(current_context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
writer.WriteBit(false); // Frame cropping flag
writer.WriteBit(false); // VUI parameter present flag
@@ -111,57 +146,59 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
writer.WriteUe(0);
writer.WriteUe(0);
writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
writer.WriteBit(context.h264_parameter_set.pic_order_present_flag != 0);
writer.WriteBit(current_context.h264_parameter_set.entropy_coding_mode_flag != 0);
writer.WriteBit(current_context.h264_parameter_set.pic_order_present_flag != 0);
writer.WriteUe(0);
writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0);
writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2);
s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value());
writer.WriteUe(current_context.h264_parameter_set.num_refidx_l0_default_active);
writer.WriteUe(current_context.h264_parameter_set.num_refidx_l1_default_active);
writer.WriteBit(current_context.h264_parameter_set.flags.weighted_pred.Value() != 0);
writer.WriteU(static_cast<s32>(current_context.h264_parameter_set.weighted_bipred_idc.Value()),
2);
s32 pic_init_qp =
static_cast<s32>(current_context.h264_parameter_set.pic_init_qp_minus26.Value());
writer.WriteSe(pic_init_qp);
writer.WriteSe(0);
s32 chroma_qp_index_offset =
static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value());
static_cast<s32>(current_context.h264_parameter_set.chroma_qp_index_offset.Value());
writer.WriteSe(chroma_qp_index_offset);
writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
writer.WriteBit(current_context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
writer.WriteBit(current_context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
writer.WriteBit(current_context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
writer.WriteBit(current_context.h264_parameter_set.transform_8x8_mode_flag != 0);
writer.WriteBit(true); // pic_scaling_matrix_present_flag
for (s32 index = 0; index < 6; index++) {
writer.WriteBit(true);
std::span<const u8> matrix{context.weight_scale};
writer.WriteScalingList(scan, matrix, index * 16, 16);
std::span<const u8> matrix{current_context.weight_scale_4x4};
writer.WriteScalingList(scan_scratch, matrix, index * 16, 16);
}
if (context.h264_parameter_set.transform_8x8_mode_flag) {
if (current_context.h264_parameter_set.transform_8x8_mode_flag) {
for (s32 index = 0; index < 2; index++) {
writer.WriteBit(true);
std::span<const u8> matrix{context.weight_scale_8x8};
writer.WriteScalingList(scan, matrix, index * 64, 64);
std::span<const u8> matrix{current_context.weight_scale_8x8};
writer.WriteScalingList(scan_scratch, matrix, index * 64, 64);
}
}
s32 chroma_qp_index_offset2 =
static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value());
static_cast<s32>(current_context.h264_parameter_set.second_chroma_qp_index_offset.Value());
writer.WriteSe(chroma_qp_index_offset2);
writer.End();
const auto& encoded_header = writer.GetByteArray();
frame.resize(encoded_header.size() + context.stream_len);
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
frame_scratch.resize(encoded_header.size() + current_context.stream_len);
std::memcpy(frame_scratch.data(), encoded_header.data(), encoded_header.size());
*out_configuration_size = encoded_header.size();
host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data() + encoded_header.size(),
context.stream_len);
memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(),
frame_scratch.data() + encoded_header.size(),
current_context.stream_len);
return frame;
return frame_scratch;
}
H264BitWriter::H264BitWriter() = default;
@@ -278,4 +315,4 @@ void H264BitWriter::Flush() {
buffer = 0;
buffer_pos = 0;
}
} // namespace Tegra::Decoder
} // namespace Tegra::Decoders

View File

@@ -10,6 +10,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/scratch_buffer.h"
#include "video_core/host1x/codecs/decoder.h"
#include "video_core/host1x/nvdec_common.h"
namespace Tegra {
@@ -18,7 +19,7 @@ namespace Host1x {
class Host1x;
} // namespace Host1x
namespace Decoder {
namespace Decoders {
class H264BitWriter {
public:
@@ -60,123 +61,213 @@ private:
std::vector<u8> byte_array;
};
class H264 {
public:
explicit H264(Host1x::Host1x& host1x);
~H264();
/// Compose the H264 frame for FFmpeg decoding
[[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
size_t* out_configuration_size,
bool is_first_frame = false);
struct Offset {
constexpr u32 Address() const noexcept {
return offset << 8;
}
private:
Common::ScratchBuffer<u8> frame;
Common::ScratchBuffer<u8> scan;
Host1x::Host1x& host1x;
u32 offset;
};
static_assert(std::is_trivial_v<Offset>, "Offset must be trivial");
static_assert(sizeof(Offset) == 0x4, "Offset has the wrong size!");
struct H264ParameterSet {
s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
s32 delta_pic_order_always_zero_flag; ///< 0x04
s32 frame_mbs_only_flag; ///< 0x08
u32 pic_width_in_mbs; ///< 0x0C
u32 frame_height_in_map_units; ///< 0x10
union { ///< 0x14
BitField<0, 2, u32> tile_format;
BitField<2, 3, u32> gob_height;
};
u32 entropy_coding_mode_flag; ///< 0x18
s32 pic_order_present_flag; ///< 0x1C
s32 num_refidx_l0_default_active; ///< 0x20
s32 num_refidx_l1_default_active; ///< 0x24
s32 deblocking_filter_control_present_flag; ///< 0x28
s32 redundant_pic_cnt_present_flag; ///< 0x2C
u32 transform_8x8_mode_flag; ///< 0x30
u32 pitch_luma; ///< 0x34
u32 pitch_chroma; ///< 0x38
u32 luma_top_offset; ///< 0x3C
u32 luma_bot_offset; ///< 0x40
u32 luma_frame_offset; ///< 0x44
u32 chroma_top_offset; ///< 0x48
u32 chroma_bot_offset; ///< 0x4C
u32 chroma_frame_offset; ///< 0x50
u32 hist_buffer_size; ///< 0x54
union { ///< 0x58
union {
BitField<0, 1, u64> mbaff_frame;
BitField<1, 1, u64> direct_8x8_inference;
BitField<2, 1, u64> weighted_pred;
BitField<3, 1, u64> constrained_intra_pred;
BitField<4, 1, u64> ref_pic;
BitField<5, 1, u64> field_pic;
BitField<6, 1, u64> bottom_field;
BitField<7, 1, u64> second_field;
} flags;
BitField<8, 4, u64> log2_max_frame_num_minus4;
BitField<12, 2, u64> chroma_format_idc;
BitField<14, 2, u64> pic_order_cnt_type;
BitField<16, 6, s64> pic_init_qp_minus26;
BitField<22, 5, s64> chroma_qp_index_offset;
BitField<27, 5, s64> second_chroma_qp_index_offset;
BitField<32, 2, u64> weighted_bipred_idc;
BitField<34, 7, u64> curr_pic_idx;
BitField<41, 5, u64> curr_col_idx;
BitField<46, 16, u64> frame_number;
BitField<62, 1, u64> frame_surfaces;
BitField<63, 1, u64> output_memory_layout;
};
struct H264ParameterSet {
s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
s32 delta_pic_order_always_zero_flag; ///< 0x04
s32 frame_mbs_only_flag; ///< 0x08
u32 pic_width_in_mbs; ///< 0x0C
u32 frame_height_in_mbs; ///< 0x10
union { ///< 0x14
BitField<0, 2, u32> tile_format;
BitField<2, 3, u32> gob_height;
BitField<5, 27, u32> reserved_surface_format;
};
static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
struct H264DecoderContext {
INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000
u32 stream_len; ///< 0x0048
INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C
H264ParameterSet h264_parameter_set; ///< 0x0058
INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8
std::array<u8, 0x60> weight_scale; ///< 0x01C0
std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
u32 entropy_coding_mode_flag; ///< 0x18
s32 pic_order_present_flag; ///< 0x1C
s32 num_refidx_l0_default_active; ///< 0x20
s32 num_refidx_l1_default_active; ///< 0x24
s32 deblocking_filter_control_present_flag; ///< 0x28
s32 redundant_pic_cnt_present_flag; ///< 0x2C
u32 transform_8x8_mode_flag; ///< 0x30
u32 pitch_luma; ///< 0x34
u32 pitch_chroma; ///< 0x38
Offset luma_top_offset; ///< 0x3C
Offset luma_bot_offset; ///< 0x40
Offset luma_frame_offset; ///< 0x44
Offset chroma_top_offset; ///< 0x48
Offset chroma_bot_offset; ///< 0x4C
Offset chroma_frame_offset; ///< 0x50
u32 hist_buffer_size; ///< 0x54
union { ///< 0x58
union {
BitField<0, 1, u64> mbaff_frame;
BitField<1, 1, u64> direct_8x8_inference;
BitField<2, 1, u64> weighted_pred;
BitField<3, 1, u64> constrained_intra_pred;
BitField<4, 1, u64> ref_pic;
BitField<5, 1, u64> field_pic;
BitField<6, 1, u64> bottom_field;
BitField<7, 1, u64> second_field;
} flags;
BitField<8, 4, u64> log2_max_frame_num_minus4;
BitField<12, 2, u64> chroma_format_idc;
BitField<14, 2, u64> pic_order_cnt_type;
BitField<16, 6, s64> pic_init_qp_minus26;
BitField<22, 5, s64> chroma_qp_index_offset;
BitField<27, 5, s64> second_chroma_qp_index_offset;
BitField<32, 2, u64> weighted_bipred_idc;
BitField<34, 7, u64> curr_pic_idx;
BitField<41, 5, u64> curr_col_idx;
BitField<46, 16, u64> frame_number;
BitField<62, 1, u64> frame_surfaces;
BitField<63, 1, u64> output_memory_layout;
};
static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size");
};
static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
#define ASSERT_POSITION(field_name, position) \
static_assert(offsetof(H264ParameterSet, field_name) == position, \
"Field " #field_name " has invalid position")
ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
ASSERT_POSITION(frame_mbs_only_flag, 0x08);
ASSERT_POSITION(pic_width_in_mbs, 0x0C);
ASSERT_POSITION(frame_height_in_map_units, 0x10);
ASSERT_POSITION(tile_format, 0x14);
ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
ASSERT_POSITION(pic_order_present_flag, 0x1C);
ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
ASSERT_POSITION(pitch_luma, 0x34);
ASSERT_POSITION(pitch_chroma, 0x38);
ASSERT_POSITION(luma_top_offset, 0x3C);
ASSERT_POSITION(luma_bot_offset, 0x40);
ASSERT_POSITION(luma_frame_offset, 0x44);
ASSERT_POSITION(chroma_top_offset, 0x48);
ASSERT_POSITION(chroma_bot_offset, 0x4C);
ASSERT_POSITION(chroma_frame_offset, 0x50);
ASSERT_POSITION(hist_buffer_size, 0x54);
ASSERT_POSITION(flags, 0x58);
ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
ASSERT_POSITION(frame_mbs_only_flag, 0x08);
ASSERT_POSITION(pic_width_in_mbs, 0x0C);
ASSERT_POSITION(frame_height_in_mbs, 0x10);
ASSERT_POSITION(tile_format, 0x14);
ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
ASSERT_POSITION(pic_order_present_flag, 0x1C);
ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
ASSERT_POSITION(pitch_luma, 0x34);
ASSERT_POSITION(pitch_chroma, 0x38);
ASSERT_POSITION(luma_top_offset, 0x3C);
ASSERT_POSITION(luma_bot_offset, 0x40);
ASSERT_POSITION(luma_frame_offset, 0x44);
ASSERT_POSITION(chroma_top_offset, 0x48);
ASSERT_POSITION(chroma_bot_offset, 0x4C);
ASSERT_POSITION(chroma_frame_offset, 0x50);
ASSERT_POSITION(hist_buffer_size, 0x54);
ASSERT_POSITION(flags, 0x58);
#undef ASSERT_POSITION
struct DpbEntry {
union {
BitField<0, 7, u32> index;
BitField<7, 5, u32> col_idx;
BitField<12, 2, u32> state;
BitField<14, 1, u32> is_long_term;
BitField<15, 1, u32> non_existing;
BitField<16, 1, u32> is_field;
BitField<17, 4, u32> top_field_marking;
BitField<21, 4, u32> bottom_field_marking;
BitField<25, 1, u32> output_memory_layout;
BitField<26, 6, u32> reserved;
} flags;
std::array<u32, 2> field_order_cnt;
u32 frame_idx;
};
static_assert(sizeof(DpbEntry) == 0x10, "DpbEntry has the wrong size!");
struct DisplayParam {
union {
BitField<0, 1, u32> enable_tf_output;
BitField<1, 1, u32> vc1_map_y_flag;
BitField<2, 3, u32> map_y_value;
BitField<5, 1, u32> vc1_map_uv_flag;
BitField<6, 3, u32> map_uv_value;
BitField<9, 8, u32> out_stride;
BitField<17, 3, u32> tiling_format;
BitField<20, 1, u32> output_structure; // 0=frame, 1=field
BitField<21, 11, u32> reserved0;
};
std::array<s32, 2> output_top;
std::array<s32, 2> output_bottom;
union {
BitField<0, 1, u32> enable_histogram;
BitField<1, 12, u32> histogram_start_x;
BitField<13, 12, u32> histogram_start_y;
BitField<25, 7, u32> reserved1;
};
union {
BitField<0, 12, u32> histogram_end_x;
BitField<12, 12, u32> histogram_end_y;
BitField<24, 8, u32> reserved2;
};
};
static_assert(sizeof(DisplayParam) == 0x1C, "DisplayParam has the wrong size!");
struct H264DecoderContext {
INSERT_PADDING_WORDS_NOINIT(13); ///< 0x0000
std::array<u8, 16> eos; ///< 0x0034
u8 explicit_eos_present_flag; ///< 0x0044
u8 hint_dump_en; ///< 0x0045
INSERT_PADDING_BYTES_NOINIT(2); ///< 0x0046
u32 stream_len; ///< 0x0048
u32 slice_count; ///< 0x004C
u32 mbhist_buffer_size; ///< 0x0050
u32 gptimer_timeout_value; ///< 0x0054
H264ParameterSet h264_parameter_set; ///< 0x0058
std::array<s32, 2> curr_field_order_cnt; ///< 0x00B8
std::array<DpbEntry, 16> dpb; ///< 0x00C0
std::array<u8, 0x60> weight_scale_4x4; ///< 0x01C0
std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
std::array<u8, 2> num_inter_view_refs_lX; ///< 0x02A0
std::array<u8, 14> reserved2; ///< 0x02A2
std::array<std::array<s8, 16>, 2> inter_view_refidx_lX; ///< 0x02B0
union { ///< 0x02D0
BitField<0, 1, u32> lossless_ipred8x8_filter_enable;
BitField<1, 1, u32> qpprime_y_zero_transform_bypass_flag;
BitField<2, 30, u32> reserved3;
};
DisplayParam display_param; ///< 0x02D4
std::array<u32, 3> reserved4; ///< 0x02F0
};
static_assert(sizeof(H264DecoderContext) == 0x2FC, "H264DecoderContext is an invalid size");
#define ASSERT_POSITION(field_name, position) \
static_assert(offsetof(H264DecoderContext, field_name) == position, \
"Field " #field_name " has invalid position")
ASSERT_POSITION(stream_len, 0x48);
ASSERT_POSITION(h264_parameter_set, 0x58);
ASSERT_POSITION(weight_scale, 0x1C0);
ASSERT_POSITION(stream_len, 0x48);
ASSERT_POSITION(h264_parameter_set, 0x58);
ASSERT_POSITION(dpb, 0xC0);
ASSERT_POSITION(weight_scale_4x4, 0x1C0);
#undef ASSERT_POSITION
class H264 final : public Decoder {
public:
explicit H264(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
Host1x::FrameQueue& frame_queue);
~H264() override;
H264(const H264&) = delete;
H264& operator=(const H264&) = delete;
H264(H264&&) = delete;
H264& operator=(H264&&) = delete;
/// Compose the H264 frame for FFmpeg decoding
[[nodiscard]] std::span<const u8> ComposeFrame() override;
std::tuple<u64, u64> GetProgressiveOffsets() override;
std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() override;
bool IsInterlaced() override;
std::string_view GetCurrentCodecName() const override {
return "H264";
}
private:
bool is_first_frame{true};
Common::ScratchBuffer<u8> frame_scratch;
Common::ScratchBuffer<u8> scan_scratch;
H264DecoderContext current_context{};
};
} // namespace Decoder
} // namespace Decoders
} // namespace Tegra

View File

@@ -7,47 +7,70 @@
#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
namespace Tegra::Decoder {
VP8::VP8(Host1x::Host1x& host1x_) : host1x{host1x_} {}
namespace Tegra::Decoders {
VP8::VP8(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
Host1x::FrameQueue& frame_queue_)
: Decoder{host1x_, id_, regs_, frame_queue_} {
codec = Host1x::NvdecCommon::VideoCodec::VP8;
initialized = decode_api.Initialize(codec);
}
VP8::~VP8() = default;
std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
VP8PictureInfo info;
host1x.GMMU().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
std::tuple<u64, u64> VP8::GetProgressiveOffsets() {
auto luma{regs.surface_luma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
auto chroma{regs.surface_chroma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
return {luma, chroma};
}
const bool is_key_frame = info.key_frame == 1u;
const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size);
std::tuple<u64, u64, u64, u64> VP8::GetInterlacedOffsets() {
auto luma_top{regs.surface_luma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
auto luma_bottom{
regs.surface_luma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
auto chroma_top{
regs.surface_chroma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
auto chroma_bottom{
regs.surface_chroma_offsets[static_cast<u32>(Vp8SurfaceIndex::Current)].Address()};
return {luma_top, luma_bottom, chroma_top, chroma_bottom};
}
std::span<const u8> VP8::ComposeFrame() {
memory_manager.ReadBlock(regs.picture_info_offset.Address(), &current_context,
sizeof(VP8PictureInfo));
const bool is_key_frame = current_context.key_frame == 1u;
const auto bitstream_size = static_cast<size_t>(current_context.vld_buffer_size);
const size_t header_size = is_key_frame ? 10u : 3u;
frame.resize(header_size + bitstream_size);
frame_scratch.resize(header_size + bitstream_size);
// Based on page 30 of the VP8 specification.
// https://datatracker.ietf.org/doc/rfc6386/
frame[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes).
frame[0] |= static_cast<u8>((info.version & 7u) << 1u); // 3-bit version number
frame[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag
frame_scratch[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes).
frame_scratch[0] |=
static_cast<u8>((current_context.version & 7u) << 1u); // 3-bit version number
frame_scratch[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag
// The next 19-bits are the first partition size
frame[0] |= static_cast<u8>((info.first_part_size & 7u) << 5u);
frame[1] = static_cast<u8>((info.first_part_size & 0x7f8u) >> 3u);
frame[2] = static_cast<u8>((info.first_part_size & 0x7f800u) >> 11u);
frame_scratch[0] |= static_cast<u8>((current_context.first_part_size & 7u) << 5u);
frame_scratch[1] = static_cast<u8>((current_context.first_part_size & 0x7f8u) >> 3u);
frame_scratch[2] = static_cast<u8>((current_context.first_part_size & 0x7f800u) >> 11u);
if (is_key_frame) {
frame[3] = 0x9du;
frame[4] = 0x01u;
frame[5] = 0x2au;
frame_scratch[3] = 0x9du;
frame_scratch[4] = 0x01u;
frame_scratch[5] = 0x2au;
// TODO(ameerj): Horizontal/Vertical Scale
// 16 bits: (2 bits Horizontal Scale << 14) | Width (14 bits)
frame[6] = static_cast<u8>(info.frame_width & 0xff);
frame[7] = static_cast<u8>(((info.frame_width >> 8) & 0x3f));
frame_scratch[6] = static_cast<u8>(current_context.frame_width & 0xff);
frame_scratch[7] = static_cast<u8>(((current_context.frame_width >> 8) & 0x3f));
// 16 bits:(2 bits Vertical Scale << 14) | Height (14 bits)
frame[8] = static_cast<u8>(info.frame_height & 0xff);
frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f));
frame_scratch[8] = static_cast<u8>(current_context.frame_height & 0xff);
frame_scratch[9] = static_cast<u8>(((current_context.frame_height >> 8) & 0x3f));
}
const u64 bitstream_offset = state.frame_bitstream_offset;
host1x.GMMU().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
const u64 bitstream_offset = regs.frame_bitstream_offset.Address();
memory_manager.ReadBlock(bitstream_offset, frame_scratch.data() + header_size, bitstream_size);
return frame;
return frame_scratch;
}
} // namespace Tegra::Decoder
} // namespace Tegra::Decoders

View File

@@ -9,6 +9,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/scratch_buffer.h"
#include "video_core/host1x/codecs/decoder.h"
#include "video_core/host1x/nvdec_common.h"
namespace Tegra {
@@ -17,20 +18,41 @@ namespace Host1x {
class Host1x;
} // namespace Host1x
namespace Decoder {
namespace Decoders {
enum class Vp8SurfaceIndex : u32 {
Last = 0,
Golden = 1,
AltRef = 2,
Current = 3,
};
class VP8 {
class VP8 final : public Decoder {
public:
explicit VP8(Host1x::Host1x& host1x);
~VP8();
explicit VP8(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
Host1x::FrameQueue& frame_queue);
~VP8() override;
/// Compose the VP8 frame for FFmpeg decoding
[[nodiscard]] std::span<const u8> ComposeFrame(
const Host1x::NvdecCommon::NvdecRegisters& state);
VP8(const VP8&) = delete;
VP8& operator=(const VP8&) = delete;
VP8(VP8&&) = delete;
VP8& operator=(VP8&&) = delete;
[[nodiscard]] std::span<const u8> ComposeFrame() override;
std::tuple<u64, u64> GetProgressiveOffsets() override;
std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() override;
bool IsInterlaced() override {
return false;
}
std::string_view GetCurrentCodecName() const override {
return "VP8";
}
private:
Common::ScratchBuffer<u8> frame;
Host1x::Host1x& host1x;
Common::ScratchBuffer<u8> frame_scratch;
struct VP8PictureInfo {
INSERT_PADDING_WORDS_NOINIT(14);
@@ -73,7 +95,9 @@ private:
INSERT_PADDING_WORDS_NOINIT(3);
};
static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size");
VP8PictureInfo current_context{};
};
} // namespace Decoder
} // namespace Decoders
} // namespace Tegra

View File

@@ -4,12 +4,13 @@
#include <algorithm> // for std::copy
#include <numeric>
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/host1x/codecs/vp9.h"
#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
namespace Tegra::Decoder {
namespace Tegra::Decoders {
namespace {
constexpr u32 diff_update_probability = 252;
constexpr u32 frame_sync_code = 0x498342;
@@ -237,7 +238,12 @@ constexpr std::array<u8, 254> map_lut{
}
} // Anonymous namespace
VP9::VP9(Host1x::Host1x& host1x_) : host1x{host1x_} {}
VP9::VP9(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_,
Host1x::FrameQueue& frame_queue_)
: Decoder{host1x_, id_, regs_, frame_queue_} {
codec = Host1x::NvdecCommon::VideoCodec::VP9;
initialized = decode_api.Initialize(codec);
}
VP9::~VP9() = default;
@@ -356,35 +362,113 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
}
}
Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) {
PictureInfo picture_info;
host1x.GMMU().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
Vp9PictureInfo vp9_info = picture_info.Convert();
void VP9::WriteSegmentation(VpxBitStreamWriter& writer) {
bool enabled = current_picture_info.segmentation.enabled != 0;
writer.WriteBit(enabled);
if (!enabled) {
return;
}
InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
auto update_map = current_picture_info.segmentation.update_map != 0;
writer.WriteBit(update_map);
if (update_map) {
EntropyProbs entropy_probs{};
memory_manager.ReadBlock(regs.vp9_prob_tab_buffer_offset.Address(), &entropy_probs,
sizeof(entropy_probs));
auto WriteProb = [&](u8 prob) {
bool coded = prob != 255;
writer.WriteBit(coded);
if (coded) {
writer.WriteU(prob, 8);
}
};
for (size_t i = 0; i < entropy_probs.mb_segment_tree_probs.size(); i++) {
WriteProb(entropy_probs.mb_segment_tree_probs[i]);
}
auto temporal_update = current_picture_info.segmentation.temporal_update != 0;
writer.WriteBit(temporal_update);
if (temporal_update) {
for (s32 i = 0; i < 3; i++) {
WriteProb(entropy_probs.segment_pred_probs[i]);
}
}
}
if (last_segmentation == current_picture_info.segmentation) {
writer.WriteBit(false);
return;
}
last_segmentation = current_picture_info.segmentation;
writer.WriteBit(true);
writer.WriteBit(current_picture_info.segmentation.abs_delta != 0);
constexpr s32 MAX_SEGMENTS = 8;
constexpr std::array SegmentationFeatureBits = {8, 6, 2, 0};
for (s32 i = 0; i < MAX_SEGMENTS; i++) {
auto q_enabled = current_picture_info.segmentation.feature_enabled[i][0] != 0;
writer.WriteBit(q_enabled);
if (q_enabled) {
writer.WriteS(current_picture_info.segmentation.feature_data[i][0],
SegmentationFeatureBits[0]);
}
auto lf_enabled = current_picture_info.segmentation.feature_enabled[i][1] != 0;
writer.WriteBit(lf_enabled);
if (lf_enabled) {
writer.WriteS(current_picture_info.segmentation.feature_data[i][1],
SegmentationFeatureBits[1]);
}
auto ref_enabled = current_picture_info.segmentation.feature_enabled[i][2] != 0;
writer.WriteBit(ref_enabled);
if (ref_enabled) {
writer.WriteU(current_picture_info.segmentation.feature_data[i][2],
SegmentationFeatureBits[2]);
}
auto skip_enabled = current_picture_info.segmentation.feature_enabled[i][3] != 0;
writer.WriteBit(skip_enabled);
}
}
Vp9PictureInfo VP9::GetVp9PictureInfo() {
memory_manager.ReadBlock(regs.picture_info_offset.Address(), &current_picture_info,
sizeof(PictureInfo));
Vp9PictureInfo vp9_info = current_picture_info.Convert();
InsertEntropy(regs.vp9_prob_tab_buffer_offset.Address(), vp9_info.entropy);
// surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
// order: last, golden, altref, current.
std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4,
vp9_info.frame_offsets.begin());
for (size_t i = 0; i < 4; i++) {
vp9_info.frame_offsets[i] = regs.surface_luma_offsets[i].Address();
}
return vp9_info;
}
void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
EntropyProbs entropy;
host1x.GMMU().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
memory_manager.ReadBlock(offset, &entropy, sizeof(EntropyProbs));
entropy.Convert(dst);
}
Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
Vp9FrameContainer VP9::GetCurrentFrame() {
Vp9FrameContainer current_frame{};
{
// gpu.SyncGuestHost(); epic, why?
current_frame.info = GetVp9PictureInfo(state);
current_frame.info = GetVp9PictureInfo();
current_frame.bit_stream.resize(current_frame.info.bitstream_size);
host1x.GMMU().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
current_frame.info.bitstream_size);
memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(),
current_frame.bit_stream.data(),
current_frame.info.bitstream_size);
}
if (!next_frame.bit_stream.empty()) {
Vp9FrameContainer temp{
@@ -742,8 +826,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
ASSERT(!current_frame_info.segment_enabled);
uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
WriteSegmentation(uncomp_writer);
const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width);
@@ -770,10 +853,29 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
return uncomp_writer;
}
void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
std::tuple<u64, u64> VP9::GetProgressiveOffsets() {
auto luma{regs.surface_luma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
auto chroma{regs.surface_chroma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
return {luma, chroma};
}
std::tuple<u64, u64, u64, u64> VP9::GetInterlacedOffsets() {
auto luma_top{regs.surface_luma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
auto luma_bottom{
regs.surface_luma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
auto chroma_top{
regs.surface_chroma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
auto chroma_bottom{
regs.surface_chroma_offsets[static_cast<u32>(Vp9SurfaceIndex::Current)].Address()};
return {luma_top, luma_bottom, chroma_top, chroma_bottom};
}
std::span<const u8> VP9::ComposeFrame() {
vp9_hidden_frame = false;
std::vector<u8> bitstream;
{
Vp9FrameContainer curr_frame = GetCurrentFrame(state);
Vp9FrameContainer curr_frame = GetCurrentFrame();
current_frame_info = curr_frame.info;
bitstream = std::move(curr_frame.bit_stream);
}
@@ -786,12 +888,16 @@ void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray();
// Write headers and frame to buffer
frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
std::copy(uncompressed_header.begin(), uncompressed_header.end(), frame.begin());
frame_scratch.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
std::copy(uncompressed_header.begin(), uncompressed_header.end(), frame_scratch.begin());
std::copy(compressed_header.begin(), compressed_header.end(),
frame.begin() + uncompressed_header.size());
frame_scratch.begin() + uncompressed_header.size());
std::copy(bitstream.begin(), bitstream.end(),
frame.begin() + uncompressed_header.size() + compressed_header.size());
frame_scratch.begin() + uncompressed_header.size() + compressed_header.size());
vp9_hidden_frame = WasFrameHidden();
return GetFrameBytes();
}
VpxRangeEncoder::VpxRangeEncoder() {
@@ -944,4 +1050,4 @@ const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const {
return byte_array;
}
} // namespace Tegra::Decoder
} // namespace Tegra::Decoders

View File

@@ -10,6 +10,7 @@
#include "common/common_types.h"
#include "common/scratch_buffer.h"
#include "common/stream.h"
#include "video_core/host1x/codecs/decoder.h"
#include "video_core/host1x/codecs/vp9_types.h"
#include "video_core/host1x/nvdec_common.h"
@@ -19,7 +20,7 @@ namespace Host1x {
class Host1x;
} // namespace Host1x
namespace Decoder {
namespace Decoders {
/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
/// VP9 header bitstreams.
@@ -110,21 +111,32 @@ private:
std::vector<u8> byte_array;
};
class VP9 {
class VP9 final : public Decoder {
public:
explicit VP9(Host1x::Host1x& host1x);
~VP9();
explicit VP9(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id,
Host1x::FrameQueue& frame_queue);
~VP9() override;
VP9(const VP9&) = delete;
VP9& operator=(const VP9&) = delete;
VP9(VP9&&) = default;
VP9(VP9&&) = delete;
VP9& operator=(VP9&&) = delete;
/// Composes the VP9 frame from the GPU state information.
/// Based on the official VP9 spec documentation
void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state);
[[nodiscard]] std::span<const u8> ComposeFrame() override;
std::tuple<u64, u64> GetProgressiveOffsets() override;
std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() override;
bool IsInterlaced() override {
return false;
}
std::string_view GetCurrentCodecName() const override {
return "VP9";
}
private:
/// Returns true if the most recent frame was a hidden frame.
[[nodiscard]] bool WasFrameHidden() const {
return !current_frame_info.show_frame;
@@ -132,10 +144,9 @@ public:
/// Returns a const span to the composed frame data.
[[nodiscard]] std::span<const u8> GetFrameBytes() const {
return frame;
return frame_scratch;
}
private:
/// Generates compressed header probability updates in the bitstream writer
template <typename T, std::size_t N>
void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
@@ -167,23 +178,22 @@ private:
/// Write motion vector probability updates. 6.3.17 in the spec
void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
void WriteSegmentation(VpxBitStreamWriter& writer);
/// Returns VP9 information from NVDEC provided offset and size
[[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(
const Host1x::NvdecCommon::NvdecRegisters& state);
[[nodiscard]] Vp9PictureInfo GetVp9PictureInfo();
/// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
/// Returns frame to be decoded after buffering
[[nodiscard]] Vp9FrameContainer GetCurrentFrame(
const Host1x::NvdecCommon::NvdecRegisters& state);
[[nodiscard]] Vp9FrameContainer GetCurrentFrame();
/// Use NVDEC providied information to compose the headers for the current frame
[[nodiscard]] std::vector<u8> ComposeCompressedHeader();
[[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
Host1x::Host1x& host1x;
Common::ScratchBuffer<u8> frame;
Common::ScratchBuffer<u8> frame_scratch;
std::array<s8, 4> loop_filter_ref_deltas{};
std::array<s8, 2> loop_filter_mode_deltas{};
@@ -192,9 +202,11 @@ private:
std::array<Vp9EntropyProbs, 4> frame_ctxs{};
bool swap_ref_indices{};
Segmentation last_segmentation{};
PictureInfo current_picture_info{};
Vp9PictureInfo current_frame_info{};
Vp9EntropyProbs prev_frame_probs{};
};
} // namespace Decoder
} // namespace Decoders
} // namespace Tegra

View File

@@ -11,7 +11,14 @@
namespace Tegra {
namespace Decoder {
namespace Decoders {
enum class Vp9SurfaceIndex : u32 {
Last = 0,
Golden = 1,
AltRef = 2,
Current = 3,
};
struct Vp9FrameDimensions {
s16 width;
s16 height;
@@ -48,11 +55,13 @@ enum class TxMode {
};
struct Segmentation {
constexpr bool operator==(const Segmentation& rhs) const = default;
u8 enabled;
u8 update_map;
u8 temporal_update;
u8 abs_delta;
std::array<u32, 8> feature_mask;
std::array<std::array<u8, 4>, 8> feature_enabled;
std::array<std::array<s16, 4>, 8> feature_data;
};
static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
@@ -190,7 +199,17 @@ struct PictureInfo {
static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
struct EntropyProbs {
INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000
std::array<u8, 10 * 10 * 8> kf_bmode_prob; ///< 0x0000
std::array<u8, 10 * 10 * 1> kf_bmode_probB; ///< 0x0320
std::array<u8, 3> ref_pred_probs; ///< 0x0384
std::array<u8, 7> mb_segment_tree_probs; ///< 0x0387
std::array<u8, 3> segment_pred_probs; ///< 0x038E
std::array<u8, 4> ref_scores; ///< 0x0391
std::array<u8, 2> prob_comppred; ///< 0x0395
INSERT_PADDING_BYTES_NOINIT(9); ///< 0x0397
std::array<u8, 10 * 8> kf_uv_mode_prob; ///< 0x03A0
std::array<u8, 10 * 1> kf_uv_mode_probB; ///< 0x03F0
INSERT_PADDING_BYTES_NOINIT(6); ///< 0x03FA
std::array<u8, 28> inter_mode_prob; ///< 0x0400
std::array<u8, 4> intra_inter_prob; ///< 0x041C
INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420
@@ -302,5 +321,5 @@ ASSERT_POSITION(class_0_fr, 0x560);
ASSERT_POSITION(coef_probs, 0x5A0);
#undef ASSERT_POSITION
}; // namespace Decoder
}; // namespace Decoders
}; // namespace Tegra

View File

@@ -27,6 +27,7 @@ void Control::ProcessMethod(Method method, u32 argument) {
}
void Control::Execute(u32 data) {
LOG_TRACE(Service_NVDRV, "Control wait syncpt {} value {}", data, syncpoint_value);
host1x.GetSyncpointManager().WaitHost(data, syncpoint_value);
}

View File

@@ -6,9 +6,7 @@
#include "common/common_types.h"
namespace Tegra {
namespace Host1x {
namespace Tegra::Host1x {
class Host1x;
class Nvdec;
@@ -31,10 +29,8 @@ private:
/// For Host1x, execute is waiting on a syncpoint previously written into the state
void Execute(u32 data);
u32 syncpoint_value{};
Host1x& host1x;
u32 syncpoint_value{};
};
} // namespace Host1x
} // namespace Tegra
} // namespace Tegra::Host1x

View File

@@ -5,7 +5,9 @@
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/host1x/ffmpeg/ffmpeg.h"
#include "video_core/memory_manager.h"
extern "C" {
#ifdef LIBVA_FOUND
@@ -149,6 +151,7 @@ bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context,
}
}
LOG_INFO(HW_GPU, "Hardware decoding is disabled due to implementation issues, using CPU.");
return false;
}
@@ -183,8 +186,8 @@ bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
return true;
}
DecoderContext::DecoderContext(const Decoder& decoder) {
m_codec_context = avcodec_alloc_context3(decoder.GetCodec());
DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} {
m_codec_context = avcodec_alloc_context3(m_decoder.GetCodec());
av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
m_codec_context->thread_count = 0;
m_codec_context->thread_type &= ~FF_THREAD_FRAME;
@@ -216,6 +219,25 @@ bool DecoderContext::OpenContext(const Decoder& decoder) {
}
bool DecoderContext::SendPacket(const Packet& packet) {
m_temp_frame = std::make_shared<Frame>();
m_got_frame = 0;
// Android can randomly crash when calling decode directly, so skip.
// TODO update ffmpeg and hope that fixes it.
#ifndef ANDROID
if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) {
m_decode_order = true;
auto* codec{ffcodec(m_decoder.GetCodec())};
if (const int ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(),
&m_got_frame, packet.GetPacket());
ret < 0) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", AVError(ret));
return false;
}
return true;
}
#endif
if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
return false;
@@ -224,139 +246,73 @@ bool DecoderContext::SendPacket(const Packet& packet) {
return true;
}
std::unique_ptr<Frame> DecoderContext::ReceiveFrame(bool* out_is_interlaced) {
auto dst_frame = std::make_unique<Frame>();
std::shared_ptr<Frame> DecoderContext::ReceiveFrame() {
// Android can randomly crash when calling decode directly, so skip.
// TODO update ffmpeg and hope that fixes it.
#ifndef ANDROID
if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) {
m_decode_order = true;
auto* codec{ffcodec(m_decoder.GetCodec())};
int ret{0};
const auto ReceiveImpl = [&](AVFrame* frame) {
if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
return false;
if (m_got_frame == 0) {
Packet packet{{}};
auto* pkt = packet.GetPacket();
pkt->data = nullptr;
pkt->size = 0;
ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(), &m_got_frame, pkt);
m_codec_context->has_b_frames = 0;
}
*out_is_interlaced =
#if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59
(frame->flags & AV_FRAME_FLAG_INTERLACED) != 0;
#else
frame->interlaced_frame != 0;
if (m_got_frame == 0 || ret < 0) {
LOG_ERROR(Service_NVDRV, "Failed to receive a frame! error {}", ret);
return {};
}
} else
#endif
return true;
};
{
if (m_codec_context->hw_device_ctx) {
// If we have a hardware context, make a separate frame here to receive the
// hardware result before sending it to the output.
Frame intermediate_frame;
const auto ReceiveImpl = [&](AVFrame* frame) {
if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
return false;
}
if (!ReceiveImpl(intermediate_frame.GetFrame())) {
return {};
}
return true;
};
dst_frame->SetFormat(PreferredGpuFormat);
if (const int ret =
av_hwframe_transfer_data(dst_frame->GetFrame(), intermediate_frame.GetFrame(), 0);
ret < 0) {
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
return {};
}
} else {
// Otherwise, decode the frame as normal.
if (!ReceiveImpl(dst_frame->GetFrame())) {
return {};
if (m_codec_context->hw_device_ctx) {
// If we have a hardware context, make a separate frame here to receive the
// hardware result before sending it to the output.
Frame intermediate_frame;
if (!ReceiveImpl(intermediate_frame.GetFrame())) {
return {};
}
m_temp_frame->SetFormat(PreferredGpuFormat);
if (const int ret = av_hwframe_transfer_data(m_temp_frame->GetFrame(),
intermediate_frame.GetFrame(), 0);
ret < 0) {
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
return {};
}
} else {
// Otherwise, decode the frame as normal.
if (!ReceiveImpl(m_temp_frame->GetFrame())) {
return {};
}
}
}
return dst_frame;
}
DeinterlaceFilter::DeinterlaceFilter(const Frame& frame) {
const AVFilter* buffer_src = avfilter_get_by_name("buffer");
const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
AVFilterInOut* inputs = avfilter_inout_alloc();
AVFilterInOut* outputs = avfilter_inout_alloc();
SCOPE_EXIT({
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
});
// Don't know how to get the accurate time_base but it doesn't matter for yadif filter
// so just use 1/1 to make buffer filter happy
std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame.GetWidth(),
frame.GetHeight(), static_cast<int>(frame.GetPixelFormat()));
m_filter_graph = avfilter_graph_alloc();
int ret = avfilter_graph_create_filter(&m_source_context, buffer_src, "in", args.c_str(),
nullptr, m_filter_graph);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter source error: {}", AVError(ret));
return;
}
ret = avfilter_graph_create_filter(&m_sink_context, buffer_sink, "out", nullptr, nullptr,
m_filter_graph);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_create_filter sink error: {}", AVError(ret));
return;
}
inputs->name = av_strdup("out");
inputs->filter_ctx = m_sink_context;
inputs->pad_idx = 0;
inputs->next = nullptr;
outputs->name = av_strdup("in");
outputs->filter_ctx = m_source_context;
outputs->pad_idx = 0;
outputs->next = nullptr;
const char* description = "yadif=1:-1:0";
ret = avfilter_graph_parse_ptr(m_filter_graph, description, &inputs, &outputs, nullptr);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_parse_ptr error: {}", AVError(ret));
return;
}
ret = avfilter_graph_config(m_filter_graph, nullptr);
if (ret < 0) {
LOG_ERROR(HW_GPU, "avfilter_graph_config error: {}", AVError(ret));
return;
}
m_initialized = true;
}
bool DeinterlaceFilter::AddSourceFrame(const Frame& frame) {
if (const int ret = av_buffersrc_add_frame_flags(m_source_context, frame.GetFrame(),
AV_BUFFERSRC_FLAG_KEEP_REF);
ret < 0) {
LOG_ERROR(HW_GPU, "av_buffersrc_add_frame_flags error: {}", AVError(ret));
return false;
}
return true;
}
std::unique_ptr<Frame> DeinterlaceFilter::DrainSinkFrame() {
auto dst_frame = std::make_unique<Frame>();
const int ret = av_buffersink_get_frame(m_sink_context, dst_frame->GetFrame());
if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) {
return {};
}
if (ret < 0) {
LOG_ERROR(HW_GPU, "av_buffersink_get_frame error: {}", AVError(ret));
return {};
}
return dst_frame;
}
DeinterlaceFilter::~DeinterlaceFilter() {
avfilter_graph_free(&m_filter_graph);
#if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59
m_temp_frame->GetFrame()->interlaced_frame =
(m_temp_frame->GetFrame()->flags & AV_FRAME_FLAG_INTERLACED) != 0;
#endif
return std::move(m_temp_frame);
}
void DecodeApi::Reset() {
m_deinterlace_filter.reset();
m_hardware_context.reset();
m_decoder_context.reset();
m_decoder.reset();
@@ -382,43 +338,14 @@ bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
return true;
}
bool DecodeApi::SendPacket(std::span<const u8> packet_data, size_t configuration_size) {
bool DecodeApi::SendPacket(std::span<const u8> packet_data) {
FFmpeg::Packet packet(packet_data);
return m_decoder_context->SendPacket(packet);
}
void DecodeApi::ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue) {
std::shared_ptr<Frame> DecodeApi::ReceiveFrame() {
// Receive raw frame from decoder.
bool is_interlaced;
auto frame = m_decoder_context->ReceiveFrame(&is_interlaced);
if (!frame) {
return;
}
if (!is_interlaced) {
// If the frame is not interlaced, we can pend it now.
frame_queue.push(std::move(frame));
} else {
// Create the deinterlacer if needed.
if (!m_deinterlace_filter) {
m_deinterlace_filter.emplace(*frame);
}
// Add the frame we just received.
if (!m_deinterlace_filter->AddSourceFrame(*frame)) {
return;
}
// Pend output fields.
while (true) {
auto filter_frame = m_deinterlace_filter->DrainSinkFrame();
if (!filter_frame) {
break;
}
frame_queue.push(std::move(filter_frame));
}
}
return m_decoder_context->ReceiveFrame();
}
} // namespace FFmpeg

View File

@@ -20,17 +20,20 @@ extern "C" {
#endif
#include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/avutil.h>
#include <libavutil/opt.h>
#ifndef ANDROID
#include <libavcodec/codec_internal.h>
#endif
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
}
namespace Tegra {
class MemoryManager;
}
namespace FFmpeg {
class Packet;
@@ -90,6 +93,10 @@ public:
return m_frame->data[plane];
}
const u8* GetPlane(int plane) const {
return m_frame->data[plane];
}
u8** GetPlanes() const {
return m_frame->data;
}
@@ -98,6 +105,14 @@ public:
m_frame->format = format;
}
bool IsInterlaced() const {
return m_frame->interlaced_frame != 0;
}
bool IsHardwareDecoded() const {
return m_frame->hw_frames_ctx != nullptr;
}
AVFrame* GetFrame() const {
return m_frame;
}
@@ -160,33 +175,22 @@ public:
void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
bool OpenContext(const Decoder& decoder);
bool SendPacket(const Packet& packet);
std::unique_ptr<Frame> ReceiveFrame(bool* out_is_interlaced);
std::shared_ptr<Frame> ReceiveFrame();
AVCodecContext* GetCodecContext() const {
return m_codec_context;
}
bool UsingDecodeOrder() const {
return m_decode_order;
}
private:
const Decoder& m_decoder;
AVCodecContext* m_codec_context{};
};
// Wraps an AVFilterGraph.
class DeinterlaceFilter {
public:
YUZU_NON_COPYABLE(DeinterlaceFilter);
YUZU_NON_MOVEABLE(DeinterlaceFilter);
explicit DeinterlaceFilter(const Frame& frame);
~DeinterlaceFilter();
bool AddSourceFrame(const Frame& frame);
std::unique_ptr<Frame> DrainSinkFrame();
private:
AVFilterGraph* m_filter_graph{};
AVFilterContext* m_source_context{};
AVFilterContext* m_sink_context{};
bool m_initialized{};
s32 m_got_frame{};
std::shared_ptr<Frame> m_temp_frame{};
bool m_decode_order{};
};
class DecodeApi {
@@ -200,14 +204,17 @@ public:
bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
void Reset();
bool SendPacket(std::span<const u8> packet_data, size_t configuration_size);
void ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue);
bool UsingDecodeOrder() const {
return m_decoder_context->UsingDecodeOrder();
}
bool SendPacket(std::span<const u8> packet_data);
std::shared_ptr<Frame> ReceiveFrame();
private:
std::optional<FFmpeg::Decoder> m_decoder;
std::optional<FFmpeg::DecoderContext> m_decoder_context;
std::optional<FFmpeg::HardwareContext> m_hardware_context;
std::optional<FFmpeg::DeinterlaceFilter> m_deinterlace_filter;
};
} // namespace FFmpeg

View File

@@ -3,10 +3,10 @@
#include "core/core.h"
#include "video_core/host1x/host1x.h"
#include "video_core/host1x/nvdec.h"
#include "video_core/host1x/vic.h"
namespace Tegra {
namespace Host1x {
namespace Tegra::Host1x {
Host1x::Host1x(Core::System& system_)
: system{system_}, syncpoint_manager{},
@@ -15,6 +15,22 @@ Host1x::Host1x(Core::System& system_)
Host1x::~Host1x() = default;
} // namespace Host1x
void Host1x::StartDevice(s32 fd, ChannelType type, u32 syncpt) {
switch (type) {
case ChannelType::NvDec:
devices[fd] = std::make_unique<Tegra::Host1x::Nvdec>(*this, fd, syncpt, frame_queue);
break;
case ChannelType::VIC:
devices[fd] = std::make_unique<Tegra::Host1x::Vic>(*this, fd, syncpt, frame_queue);
break;
default:
LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", static_cast<u32>(type));
break;
}
}
} // namespace Tegra
void Host1x::StopDevice(s32 fd, ChannelType type) {
devices.erase(fd);
}
} // namespace Tegra::Host1x

View File

@@ -3,9 +3,14 @@
#pragma once
#include <unordered_map>
#include <unordered_set>
#include <queue>
#include "common/common_types.h"
#include "common/address_space.h"
#include "video_core/cdma_pusher.h"
#include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/host1x/syncpoint_manager.h"
#include "video_core/memory_manager.h"
@@ -14,15 +19,128 @@ namespace Core {
class System;
} // namespace Core
namespace Tegra {
namespace FFmpeg {
class Frame;
} // namespace FFmpeg
namespace Host1x {
namespace Tegra::Host1x {
class Nvdec;
class FrameQueue {
public:
void Open(s32 fd) {
std::scoped_lock l{m_mutex};
m_presentation_order.insert({fd, {}});
m_decode_order.insert({fd, {}});
}
void Close(s32 fd) {
std::scoped_lock l{m_mutex};
m_presentation_order.erase(fd);
m_decode_order.erase(fd);
}
s32 VicFindNvdecFdFromOffset(u64 search_offset) {
std::scoped_lock l{m_mutex};
// Vic does not know which nvdec is producing frames for it, so search all the fds here for
// the given offset.
for (auto& map : m_presentation_order) {
for (auto& [offset, frame] : map.second) {
if (offset == search_offset) {
return map.first;
}
}
}
for (auto& map : m_decode_order) {
for (auto& [offset, frame] : map.second) {
if (offset == search_offset) {
return map.first;
}
}
}
return -1;
}
void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
std::scoped_lock l{m_mutex};
auto map = m_presentation_order.find(fd);
map->second.emplace_back(offset, std::move(frame));
}
void PushDecodeOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
std::scoped_lock l{m_mutex};
auto map = m_decode_order.find(fd);
map->second.insert_or_assign(offset, std::move(frame));
}
std::shared_ptr<FFmpeg::Frame> GetFrame(s32 fd, u64 offset) {
if (fd == -1) {
return {};
}
std::scoped_lock l{m_mutex};
auto present_map = m_presentation_order.find(fd);
if (present_map->second.size() > 0) {
return GetPresentOrderLocked(fd);
}
auto decode_map = m_decode_order.find(fd);
if (decode_map->second.size() > 0) {
return GetDecodeOrderLocked(fd, offset);
}
return {};
}
private:
std::shared_ptr<FFmpeg::Frame> GetPresentOrderLocked(s32 fd) {
auto map = m_presentation_order.find(fd);
if (map->second.size() == 0) {
return {};
}
auto frame = std::move(map->second.front().second);
map->second.pop_front();
return frame;
}
std::shared_ptr<FFmpeg::Frame> GetDecodeOrderLocked(s32 fd, u64 offset) {
auto map = m_decode_order.find(fd);
auto it = map->second.find(offset);
if (it == map->second.end()) {
return {};
}
return std::move(map->second.extract(it).mapped());
}
using FramePtr = std::shared_ptr<FFmpeg::Frame>;
std::mutex m_mutex{};
std::unordered_map<s32, std::deque<std::pair<u64, FramePtr>>> m_presentation_order;
std::unordered_map<s32, std::unordered_map<u64, FramePtr>> m_decode_order;
};
enum class ChannelType : u32 {
MsEnc = 0,
VIC = 1,
GPU = 2,
NvDec = 3,
Display = 4,
NvJpg = 5,
TSec = 6,
Max = 7,
};
class Host1x {
public:
explicit Host1x(Core::System& system);
~Host1x();
Core::System& System() {
return system;
}
SyncpointManager& GetSyncpointManager() {
return syncpoint_manager;
}
@@ -55,14 +173,25 @@ public:
return *allocator;
}
void StartDevice(s32 fd, ChannelType type, u32 syncpt);
void StopDevice(s32 fd, ChannelType type);
void PushEntries(s32 fd, ChCommandHeaderList&& entries) {
auto it = devices.find(fd);
if (it == devices.end()) {
return;
}
it->second->PushEntries(std::move(entries));
}
private:
Core::System& system;
SyncpointManager syncpoint_manager;
Tegra::MaxwellDeviceMemoryManager memory_manager;
Tegra::MemoryManager gmmu_manager;
std::unique_ptr<Common::FlatAllocator<u32, 0, 32>> allocator;
FrameQueue frame_queue;
std::unordered_map<s32, std::unique_ptr<CDmaPusher>> devices;
};
} // namespace Host1x
} // namespace Tegra
} // namespace Tegra::Host1x

View File

@@ -2,6 +2,12 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/polyfill_thread.h"
#include "common/settings.h"
#include "video_core/host1x/codecs/h264.h"
#include "video_core/host1x/codecs/vp8.h"
#include "video_core/host1x/codecs/vp9.h"
#include "video_core/host1x/host1x.h"
#include "video_core/host1x/nvdec.h"
@@ -10,37 +16,70 @@ namespace Tegra::Host1x {
#define NVDEC_REG_INDEX(field_name) \
(offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
Nvdec::Nvdec(Host1x& host1x_)
: host1x(host1x_), state{}, codec(std::make_unique<Codec>(host1x, state)) {}
Nvdec::Nvdec(Host1x& host1x_, s32 id_, u32 syncpt, FrameQueue& frame_queue_)
: CDmaPusher{host1x_, id_}, id{id_}, syncpoint{syncpt}, frame_queue{frame_queue_} {
LOG_INFO(HW_GPU, "Created nvdec {}", id);
frame_queue.Open(id);
}
Nvdec::~Nvdec() = default;
Nvdec::~Nvdec() {
LOG_INFO(HW_GPU, "Destroying nvdec {}", id);
frame_queue.Close(id);
}
void Nvdec::ProcessMethod(u32 method, u32 argument) {
state.reg_array[method] = static_cast<u64>(argument) << 8;
regs.reg_array[method] = argument;
switch (method) {
case NVDEC_REG_INDEX(set_codec_id):
codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
CreateDecoder(static_cast<NvdecCommon::VideoCodec>(argument));
break;
case NVDEC_REG_INDEX(execute):
case NVDEC_REG_INDEX(execute): {
if (wait_needed) {
std::this_thread::sleep_for(std::chrono::milliseconds(32));
wait_needed = false;
}
Execute();
break;
} break;
}
}
std::unique_ptr<FFmpeg::Frame> Nvdec::GetFrame() {
return codec->GetCurrentFrame();
void Nvdec::CreateDecoder(NvdecCommon::VideoCodec codec) {
if (decoder.get()) {
return;
}
switch (codec) {
case NvdecCommon::VideoCodec::H264:
decoder = std::make_unique<Decoders::H264>(host1x, regs, id, frame_queue);
break;
case NvdecCommon::VideoCodec::VP8:
decoder = std::make_unique<Decoders::VP8>(host1x, regs, id, frame_queue);
break;
case NvdecCommon::VideoCodec::VP9:
decoder = std::make_unique<Decoders::VP9>(host1x, regs, id, frame_queue);
break;
default:
UNIMPLEMENTED_MSG("Codec {}", decoder->GetCurrentCodecName());
break;
}
LOG_INFO(HW_GPU, "Created decoder {} for id {}", decoder->GetCurrentCodecName(), id);
}
void Nvdec::Execute() {
switch (codec->GetCurrentCodec()) {
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Off) [[unlikely]] {
// Signalling syncpts too fast can cause games to get stuck as they don't expect a <1ms
// execution time. Sleep for half of a 60 fps frame just in case.
std::this_thread::sleep_for(std::chrono::milliseconds(8));
return;
}
switch (decoder->GetCurrentCodec()) {
case NvdecCommon::VideoCodec::H264:
case NvdecCommon::VideoCodec::VP8:
case NvdecCommon::VideoCodec::VP9:
codec->Decode();
decoder->Decode();
break;
default:
UNIMPLEMENTED_MSG("Codec {}", codec->GetCurrentCodecName());
UNIMPLEMENTED_MSG("Codec {}", decoder->GetCurrentCodecName());
break;
}
}

View File

@@ -5,33 +5,47 @@
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "video_core/host1x/codecs/codec.h"
#include "video_core/cdma_pusher.h"
#include "video_core/host1x/codecs/decoder.h"
namespace Tegra {
namespace Host1x {
class Host1x;
class FrameQueue;
class Nvdec {
class Nvdec final : public CDmaPusher {
public:
explicit Nvdec(Host1x& host1x);
explicit Nvdec(Host1x& host1x, s32 id, u32 syncpt, FrameQueue& frame_queue_);
~Nvdec();
/// Writes the method into the state, Invoke Execute() if encountered
void ProcessMethod(u32 method, u32 argument);
void ProcessMethod(u32 method, u32 arg) override;
/// Return most recently decoded frame
[[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetFrame();
u32 GetSyncpoint() const {
return syncpoint;
}
void SetWait() {
wait_needed = true;
}
private:
/// Create the decoder when the codec id is set
void CreateDecoder(NvdecCommon::VideoCodec codec);
/// Invoke codec to decode a frame
void Execute();
Host1x& host1x;
NvdecCommon::NvdecRegisters state;
std::unique_ptr<Codec> codec;
s32 id;
u32 syncpoint;
FrameQueue& frame_queue;
NvdecCommon::NvdecRegisters regs{};
std::unique_ptr<Decoder> decoder;
bool wait_needed{false};
};
} // namespace Host1x

View File

@@ -17,6 +17,17 @@ enum class VideoCodec : u64 {
VP9 = 0x9,
};
struct Offset {
constexpr u64 Address() const noexcept {
return offset << 8;
}
private:
u64 offset;
};
static_assert(std::is_trivial_v<Offset>, "Offset must be trivial");
static_assert(sizeof(Offset) == 0x8, "Offset has the wrong size!");
// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
// doubling the sizes here is compensating for that.
struct NvdecRegisters {
@@ -38,29 +49,40 @@ struct NvdecRegisters {
BitField<17, 1, u64> all_intra_frame;
};
} control_params;
u64 picture_info_offset; ///< 0x0808
u64 frame_bitstream_offset; ///< 0x0810
u64 frame_number; ///< 0x0818
u64 h264_slice_data_offsets; ///< 0x0820
u64 h264_mv_dump_offset; ///< 0x0828
INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830
u64 frame_stats_offset; ///< 0x0848
u64 h264_last_surface_luma_offset; ///< 0x0850
u64 h264_last_surface_chroma_offset; ///< 0x0858
std::array<u64, 17> surface_luma_offset; ///< 0x0860
std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
INSERT_PADDING_WORDS_NOINIT(68); ///< 0x0970
u64 vp8_prob_data_offset; ///< 0x0A80
u64 vp8_header_partition_buf_offset; ///< 0x0A88
INSERT_PADDING_WORDS_NOINIT(60); ///< 0x0A90
u64 vp9_entropy_probs_offset; ///< 0x0B80
u64 vp9_backward_updates_offset; ///< 0x0B88
u64 vp9_last_frame_segmap_offset; ///< 0x0B90
u64 vp9_curr_frame_segmap_offset; ///< 0x0B98
INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0
u64 vp9_last_frame_mvs_offset; ///< 0x0BA8
u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0
INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8
Offset picture_info_offset; ///< 0x0808
Offset frame_bitstream_offset; ///< 0x0810
u64 frame_number; ///< 0x0818
Offset h264_slice_data_offsets; ///< 0x0820
Offset h264_mv_dump_offset; ///< 0x0828
INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830
Offset frame_stats_offset; ///< 0x0848
Offset h264_last_surface_luma_offset; ///< 0x0850
Offset h264_last_surface_chroma_offset; ///< 0x0858
std::array<Offset, 17> surface_luma_offsets; ///< 0x0860
std::array<Offset, 17> surface_chroma_offsets; ///< 0x08E8
Offset pic_scratch_buf_offset; ///< 0x0970
Offset external_mvbuffer_offset; ///< 0x0978
INSERT_PADDING_WORDS_NOINIT(32); ///< 0x0980
Offset h264_mbhist_buffer_offset; ///< 0x0A00
INSERT_PADDING_WORDS_NOINIT(30); ///< 0x0A08
Offset vp8_prob_data_offset; ///< 0x0A80
Offset vp8_header_partition_buf_offset; ///< 0x0A88
INSERT_PADDING_WORDS_NOINIT(28); ///< 0x0A90
Offset hvec_scalist_list_offset; ///< 0x0B00
Offset hvec_tile_sizes_offset; ///< 0x0B08
Offset hvec_filter_buffer_offset; ///< 0x0B10
Offset hvec_sao_buffer_offset; ///< 0x0B18
Offset hvec_slice_info_buffer_offset; ///< 0x0B20
Offset hvec_slice_group_index_offset; ///< 0x0B28
INSERT_PADDING_WORDS_NOINIT(20); ///< 0x0B30
Offset vp9_prob_tab_buffer_offset; ///< 0x0B80
Offset vp9_ctx_counter_buffer_offset; ///< 0x0B88
Offset vp9_segment_read_buffer_offset; ///< 0x0B90
Offset vp9_segment_write_buffer_offset; ///< 0x0B98
Offset vp9_tile_size_buffer_offset; ///< 0x0BA0
Offset vp9_col_mvwrite_buffer_offset; ///< 0x0BA8
Offset vp9_col_mvread_buffer_offset; ///< 0x0BB0
Offset vp9_filter_buffer_offset; ///< 0x0BB8
};
std::array<u64, NUM_REGS> reg_array;
};
@@ -81,16 +103,16 @@ ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104);
ASSERT_REG_POSITION(frame_stats_offset, 0x109);
ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
ASSERT_REG_POSITION(surface_luma_offsets, 0x10C);
ASSERT_REG_POSITION(surface_chroma_offsets, 0x11D);
ASSERT_REG_POSITION(vp8_prob_data_offset, 0x150);
ASSERT_REG_POSITION(vp8_header_partition_buf_offset, 0x151);
ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);
ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173);
ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175);
ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
ASSERT_REG_POSITION(vp9_prob_tab_buffer_offset, 0x170);
ASSERT_REG_POSITION(vp9_ctx_counter_buffer_offset, 0x171);
ASSERT_REG_POSITION(vp9_segment_read_buffer_offset, 0x172);
ASSERT_REG_POSITION(vp9_segment_write_buffer_offset, 0x173);
ASSERT_REG_POSITION(vp9_col_mvwrite_buffer_offset, 0x175);
ASSERT_REG_POSITION(vp9_col_mvread_buffer_offset, 0x176);
#undef ASSERT_REG_POSITION

View File

@@ -1,50 +0,0 @@
// SPDX-FileCopyrightText: Ryujinx Team and Contributors
// SPDX-License-Identifier: MIT
#include <algorithm>
#include "sync_manager.h"
#include "video_core/host1x/host1x.h"
#include "video_core/host1x/syncpoint_manager.h"
namespace Tegra {
namespace Host1x {
SyncptIncrManager::SyncptIncrManager(Host1x& host1x_) : host1x(host1x_) {}
SyncptIncrManager::~SyncptIncrManager() = default;
void SyncptIncrManager::Increment(u32 id) {
increments.emplace_back(0, 0, id, true);
IncrementAllDone();
}
u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
const u32 handle = current_id++;
increments.emplace_back(handle, class_id, id);
return handle;
}
void SyncptIncrManager::SignalDone(u32 handle) {
const auto done_incr =
std::find_if(increments.begin(), increments.end(),
[handle](const SyncptIncr& incr) { return incr.id == handle; });
if (done_incr != increments.cend()) {
done_incr->complete = true;
}
IncrementAllDone();
}
void SyncptIncrManager::IncrementAllDone() {
std::size_t done_count = 0;
for (; done_count < increments.size(); ++done_count) {
if (!increments[done_count].complete) {
break;
}
auto& syncpoint_manager = host1x.GetSyncpointManager();
syncpoint_manager.IncrementGuest(increments[done_count].syncpt_id);
syncpoint_manager.IncrementHost(increments[done_count].syncpt_id);
}
increments.erase(increments.begin(), increments.begin() + done_count);
}
} // namespace Host1x
} // namespace Tegra

View File

@@ -1,53 +0,0 @@
// SPDX-FileCopyrightText: Ryujinx Team and Contributors
// SPDX-License-Identifier: MIT
#pragma once
#include <mutex>
#include <vector>
#include "common/common_types.h"
namespace Tegra {
namespace Host1x {
class Host1x;
struct SyncptIncr {
u32 id;
u32 class_id;
u32 syncpt_id;
bool complete;
SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
: id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
};
class SyncptIncrManager {
public:
explicit SyncptIncrManager(Host1x& host1x);
~SyncptIncrManager();
/// Add syncpoint id and increment all
void Increment(u32 id);
/// Returns a handle to increment later
u32 IncrementWhenDone(u32 class_id, u32 id);
/// IncrememntAllDone, including handle
void SignalDone(u32 handle);
/// Increment all sequential pending increments that are already done.
void IncrementAllDone();
private:
std::vector<SyncptIncr> increments;
std::mutex increment_lock;
u32 current_id{};
Host1x& host1x;
};
} // namespace Host1x
} // namespace Tegra

View File

@@ -18,7 +18,7 @@ SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
return {};
}
std::unique_lock lk(guard);
std::scoped_lock lk(guard);
if (syncpoint.load(std::memory_order_relaxed) >= expected_value) {
action();
return {};
@@ -35,7 +35,7 @@ SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage,
const ActionHandle& handle) {
std::unique_lock lk(guard);
std::scoped_lock lk(guard);
// We want to ensure the iterator still exists prior to erasing it
// Otherwise, if an invalid iterator was passed in then it could lead to UB
@@ -78,7 +78,7 @@ void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_var
std::list<RegisteredAction>& action_storage) {
auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1};
std::unique_lock lk(guard);
std::scoped_lock lk(guard);
auto it = action_storage.begin();
while (it != action_storage.end()) {
if (it->expected_value > new_value) {

File diff suppressed because it is too large Load Diff

View File

@@ -3,65 +3,646 @@
#pragma once
#include <condition_variable>
#include <functional>
#include <memory>
#include <mutex>
#include <thread>
#include "common/common_types.h"
#include "common/scratch_buffer.h"
#include "video_core/cdma_pusher.h"
struct SwsContext;
namespace Tegra {
namespace Host1x {
namespace Tegra::Host1x {
class Host1x;
class Nvdec;
union VicConfig;
class Vic {
struct Pixel {
u16 r;
u16 g;
u16 b;
u16 a;
};
// One underscore represents separate pixels.
// Double underscore represents separate planes.
// _N represents chroma subsampling, not a separate pixel.
enum class VideoPixelFormat : u32 {
A8 = 0,
L8 = 1,
A4L4 = 2,
L4A4 = 3,
R8 = 4,
A8L8 = 5,
L8A8 = 6,
R8G8 = 7,
G8R8 = 8,
B5G6R5 = 9,
R5G6B5 = 10,
B6G5R5 = 11,
R5G5B6 = 12,
A1B5G5R5 = 13,
A1R5G5B5 = 14,
B5G5R5A1 = 15,
R5G5B5A1 = 16,
A5B5G5R1 = 17,
A5R1G5B5 = 18,
B5G5R1A5 = 19,
R1G5B5A5 = 20,
X1B5G5R5 = 21,
X1R5G5B5 = 22,
B5G5R5X1 = 23,
R5G5B5X1 = 24,
A4B4G5R4 = 25,
A4R4G4B4 = 26,
B4G4R4A4 = 27,
R4G4B4A4 = 28,
B8G8R8 = 29,
R8G8B8 = 30,
A8B8G8R8 = 31,
A8R8G8B8 = 32,
B8G8R8A8 = 33,
R8G8B8A8 = 34,
X8B8G8R8 = 35,
X8R8G8B8 = 36,
B8G8R8X8 = 37,
R8G8B8X8 = 38,
A8B10G10R10 = 39,
A2R10G10B10 = 40,
B10G10R10A2 = 41,
R10G10B10A2 = 42,
A4P4 = 43,
P4A4 = 44,
P8A8 = 45,
A8P8 = 46,
P8 = 47,
P1 = 48,
U8V8 = 49,
V8U8 = 50,
A8Y8U8V8 = 51,
V8U8Y8A8 = 52,
Y8U8V8 = 53,
Y8V8U8 = 54,
U8V8Y8 = 55,
V8U8Y8 = 56,
Y8U8_Y8V8 = 57,
Y8V8_Y8U8 = 58,
U8Y8_V8Y8 = 59,
V8Y8_U8Y8 = 60,
Y8__U8V8_N444 = 61,
Y8__V8U8_N444 = 62,
Y8__U8V8_N422 = 63,
Y8__V8U8_N422 = 64,
Y8__U8V8_N422R = 65,
Y8__V8U8_N422R = 66,
Y8__U8V8_N420 = 67,
Y8__V8U8_N420 = 68,
Y8__U8__V8_N444 = 69,
Y8__U8__V8_N422 = 70,
Y8__U8__V8_N422R = 71,
Y8__U8__V8_N420 = 72,
U8 = 73,
V8 = 74,
};
struct Offset {
constexpr u32 Address() const noexcept {
return offset << 8;
}
private:
u32 offset;
};
static_assert(std::is_trivial_v<Offset>, "Offset must be trivial");
static_assert(sizeof(Offset) == 0x4, "Offset has the wrong size!");
struct PlaneOffsets {
Offset luma;
Offset chroma_u;
Offset chroma_v;
};
static_assert(sizeof(PlaneOffsets) == 0xC, "PlaneOffsets has the wrong size!");
enum SurfaceIndex : u32 {
Current = 0,
Previous = 1,
Next = 2,
NextNoiseReduced = 3,
CurrentMotion = 4,
PreviousMotion = 5,
PreviousPreviousMotion = 6,
CombinedMotion = 7,
};
enum class DXVAHD_ALPHA_FILL_MODE : u32 {
OPAQUE = 0,
BACKGROUND = 1,
DESTINATION = 2,
SOURCE_STREAM = 3,
COMPOSITED = 4,
SOURCE_ALPHA = 5,
};
enum class DXVAHD_FRAME_FORMAT : u64 {
PROGRESSIVE = 0,
INTERLACED_TOP_FIELD_FIRST = 1,
INTERLACED_BOTTOM_FIELD_FIRST = 2,
TOP_FIELD = 3,
BOTTOM_FIELD = 4,
SUBPIC_PROGRESSIVE = 5,
SUBPIC_INTERLACED_TOP_FIELD_FIRST = 6,
SUBPIC_INTERLACED_BOTTOM_FIELD_FIRST = 7,
SUBPIC_TOP_FIELD = 8,
SUBPIC_BOTTOM_FIELD = 9,
TOP_FIELD_CHROMA_BOTTOM = 10,
BOTTOM_FIELD_CHROMA_TOP = 11,
SUBPIC_TOP_FIELD_CHROMA_BOTTOM = 12,
SUBPIC_BOTTOM_FIELD_CHROMA_TOP = 13,
};
enum class DXVAHD_DEINTERLACE_MODE_PRIVATE : u64 {
WEAVE = 0,
BOB_FIELD = 1,
BOB = 2,
NEWBOB = 3,
DISI1 = 4,
WEAVE_LUMA_BOB_FIELD_CHROMA = 5,
MAX = 0xF,
};
enum class BLK_KIND {
PITCH = 0,
GENERIC_16Bx2 = 1,
// These are unsupported in the vic
BL_NAIVE = 2,
BL_KEPLER_XBAR_RAW = 3,
VP2_TILED = 15,
};
enum class BLEND_SRCFACTC : u32 {
K1 = 0,
K1_TIMES_DST = 1,
NEG_K1_TIMES_DST = 2,
K1_TIMES_SRC = 3,
ZERO = 4,
};
enum class BLEND_DSTFACTC : u32 {
K1 = 0,
K2 = 1,
K1_TIMES_DST = 2,
NEG_K1_TIMES_DST = 3,
NEG_K1_TIMES_SRC = 4,
ZERO = 5,
ONE = 6,
};
enum class BLEND_SRCFACTA : u32 {
K1 = 0,
K2 = 1,
NEG_K1_TIMES_DST = 2,
ZERO = 3,
MAX = 7,
};
enum class BLEND_DSTFACTA : u32 {
K2 = 0,
NEG_K1_TIMES_SRC = 1,
ZERO = 2,
ONE = 3,
MAX = 7,
};
struct PipeConfig {
union {
BitField<0, 11, u32> downsample_horiz;
BitField<11, 5, u32> reserved0;
BitField<16, 11, u32> downsample_vert;
BitField<27, 5, u32> reserved1;
};
u32 reserved2;
u32 reserved3;
u32 reserved4;
};
static_assert(sizeof(PipeConfig) == 0x10, "PipeConfig has the wrong size!");
struct OutputConfig {
union {
BitField<0, 3, DXVAHD_ALPHA_FILL_MODE> alpha_fill_mode;
BitField<3, 3, u64> alpha_fill_slot;
BitField<6, 10, u64> background_a;
BitField<16, 10, u64> background_r;
BitField<26, 10, u64> background_g;
BitField<36, 10, u64> background_b;
BitField<46, 2, u64> regamma_mode;
BitField<48, 1, u64> output_flip_x;
BitField<49, 1, u64> output_flip_y;
BitField<50, 1, u64> output_transpose;
BitField<51, 1, u64> reserved1;
BitField<52, 12, u64> reserved2;
};
union {
BitField<0, 14, u32> target_rect_left;
BitField<14, 2, u32> reserved3;
BitField<16, 14, u32> target_rect_right;
BitField<30, 2, u32> reserved4;
};
union {
BitField<0, 14, u32> target_rect_top;
BitField<14, 2, u32> reserved5;
BitField<16, 14, u32> target_rect_bottom;
BitField<30, 2, u32> reserved6;
};
};
static_assert(sizeof(OutputConfig) == 0x10, "OutputConfig has the wrong size!");
struct OutputSurfaceConfig {
union {
BitField<0, 7, VideoPixelFormat> out_pixel_format;
BitField<7, 2, u32> out_chroma_loc_horiz;
BitField<9, 2, u32> out_chroma_loc_vert;
BitField<11, 4, BLK_KIND> out_block_kind;
BitField<15, 4, u32> out_block_height; // in gobs, log2
BitField<19, 3, u32> reserved0;
BitField<22, 10, u32> reserved1;
};
union {
BitField<0, 14, u32> out_surface_width; // - 1
BitField<14, 14, u32> out_surface_height; // - 1
BitField<28, 4, u32> reserved2;
};
union {
BitField<0, 14, u32> out_luma_width; // - 1
BitField<14, 14, u32> out_luma_height; // - 1
BitField<28, 4, u32> reserved3;
};
union {
BitField<0, 14, u32> out_chroma_width; // - 1
BitField<14, 14, u32> out_chroma_height; // - 1
BitField<28, 4, u32> reserved4;
};
};
static_assert(sizeof(OutputSurfaceConfig) == 0x10, "OutputSurfaceConfig has the wrong size!");
struct MatrixStruct {
union {
BitField<0, 20, s64> matrix_coeff00; // (0,0) of 4x3 conversion matrix
BitField<20, 20, s64> matrix_coeff10; // (1,0) of 4x3 conversion matrix
BitField<40, 20, s64> matrix_coeff20; // (2,0) of 4x3 conversion matrix
BitField<60, 4, u64> matrix_r_shift;
};
union {
BitField<0, 20, s64> matrix_coeff01; // (0,1) of 4x3 conversion matrix
BitField<20, 20, s64> matrix_coeff11; // (1,1) of 4x3 conversion matrix
BitField<40, 20, s64> matrix_coeff21; // (2,1) of 4x3 conversion matrix
BitField<60, 3, u64> reserved0;
BitField<63, 1, u64> matrix_enable;
};
union {
BitField<0, 20, s64> matrix_coeff02; // (0,2) of 4x3 conversion matrix
BitField<20, 20, s64> matrix_coeff12; // (1,2) of 4x3 conversion matrix
BitField<40, 20, s64> matrix_coeff22; // (2,2) of 4x3 conversion matrix
BitField<60, 4, u64> reserved1;
};
union {
BitField<0, 20, s64> matrix_coeff03; // (0,3) of 4x3 conversion matrix
BitField<20, 20, s64> matrix_coeff13; // (1,3) of 4x3 conversion matrix
BitField<40, 20, s64> matrix_coeff23; // (2,3) of 4x3 conversion matrix
BitField<60, 4, u64> reserved2;
};
};
static_assert(sizeof(MatrixStruct) == 0x20, "MatrixStruct has the wrong size!");
struct ClearRectStruct {
union {
BitField<0, 14, u32> clear_rect0_left;
BitField<14, 2, u32> reserved0;
BitField<16, 14, u32> clear_rect0_right;
BitField<30, 2, u32> reserved1;
};
union {
BitField<0, 14, u32> clear_rect0_top;
BitField<14, 2, u32> reserved2;
BitField<16, 14, u32> clear_rect0_bottom;
BitField<30, 2, u32> reserved3;
};
union {
BitField<0, 14, u32> clear_rect1_left;
BitField<14, 2, u32> reserved4;
BitField<16, 14, u32> clear_rect1_right;
BitField<30, 2, u32> reserved5;
};
union {
BitField<0, 14, u32> clear_rect1_top;
BitField<14, 2, u32> reserved6;
BitField<16, 14, u32> clear_rect1_bottom;
BitField<30, 2, u32> reserved7;
};
};
static_assert(sizeof(ClearRectStruct) == 0x10, "ClearRectStruct has the wrong size!");
struct SlotConfig {
union {
BitField<0, 1, u64> slot_enable;
BitField<1, 1, u64> denoise;
BitField<2, 1, u64> advanced_denoise;
BitField<3, 1, u64> cadence_detect;
BitField<4, 1, u64> motion_map;
BitField<5, 1, u64> motion_map_capture;
BitField<6, 1, u64> is_even;
BitField<7, 1, u64> chroma_even;
// fetch control struct
BitField<8, 1, u64> current_field_enable;
BitField<9, 1, u64> prev_field_enable;
BitField<10, 1, u64> next_field_enable;
BitField<11, 1, u64> next_nr_field_enable; // noise reduction
BitField<12, 1, u64> current_motion_field_enable;
BitField<13, 1, u64> prev_motion_field_enable;
BitField<14, 1, u64> prev_prev_motion_field_enable;
BitField<15, 1, u64> combined_motion_field_enable;
BitField<16, 4, DXVAHD_FRAME_FORMAT> frame_format;
BitField<20, 2, u64> filter_length_y; // 0: 1-tap, 1: 2-tap, 2: 5-tap, 3: 10-tap
BitField<22, 2, u64> filter_length_x;
BitField<24, 12, u64> panoramic;
BitField<36, 22, u64> reserved1;
BitField<58, 6, u64> detail_filter_clamp;
};
union {
BitField<0, 10, u64> filter_noise;
BitField<10, 10, u64> filter_detail;
BitField<20, 10, u64> chroma_noise;
BitField<30, 10, u64> chroma_detail;
BitField<40, 4, DXVAHD_DEINTERLACE_MODE_PRIVATE> deinterlace_mode;
BitField<44, 3, u64> motion_accumulation_weight;
BitField<47, 11, u64> noise_iir;
BitField<58, 4, u64> light_level;
BitField<62, 2, u64> reserved4;
};
union {
BitField<0, 10, u64> soft_clamp_low;
BitField<10, 10, u64> soft_clamp_high;
BitField<20, 3, u64> reserved5;
BitField<23, 9, u64> reserved6;
BitField<32, 10, u64> planar_alpha;
BitField<42, 1, u64> constant_alpha;
BitField<43, 3, u64> stereo_interleave;
BitField<46, 1, u64> clip_enabled;
BitField<47, 8, u64> clear_rect_mask;
BitField<55, 2, u64> degamma_mode;
BitField<57, 1, u64> reserved7;
BitField<58, 1, u64> decompress_enable;
BitField<59, 5, u64> reserved9;
};
union {
BitField<0, 8, u64> decompress_ctb_count;
BitField<8, 32, u64> decompress_zbc_count;
BitField<40, 24, u64> reserved12;
};
union {
BitField<0, 30, u64> source_rect_left;
BitField<30, 2, u64> reserved14;
BitField<32, 30, u64> source_rect_right;
BitField<62, 2, u64> reserved15;
};
union {
BitField<0, 30, u64> source_rect_top;
BitField<30, 2, u64> reserved16;
BitField<32, 30, u64> source_rect_bottom;
BitField<62, 2, u64> reserved17;
};
union {
BitField<0, 14, u64> dest_rect_left;
BitField<14, 2, u64> reserved18;
BitField<16, 14, u64> dest_rect_right;
BitField<30, 2, u64> reserved19;
BitField<32, 14, u64> dest_rect_top;
BitField<46, 2, u64> reserved20;
BitField<48, 14, u64> dest_rect_bottom;
BitField<62, 2, u64> reserved21;
};
u32 reserved22;
u32 reserved23;
};
static_assert(sizeof(SlotConfig) == 0x40, "SlotConfig has the wrong size!");
struct SlotSurfaceConfig {
union {
BitField<0, 7, VideoPixelFormat> slot_pixel_format;
BitField<7, 2, u32> slot_chroma_loc_horiz;
BitField<9, 2, u32> slot_chroma_loc_vert;
BitField<11, 4, u32> slot_block_kind;
BitField<15, 4, u32> slot_block_height;
BitField<19, 3, u32> slot_cache_width;
BitField<22, 10, u32> reserved0;
};
union {
BitField<0, 14, u32> slot_surface_width; // - 1
BitField<14, 14, u32> slot_surface_height; // - 1
BitField<28, 4, u32> reserved1;
};
union {
BitField<0, 14, u32> slot_luma_width; // padded, - 1
BitField<14, 14, u32> slot_luma_height; // padded, - 1
BitField<28, 4, u32> reserved2;
};
union {
BitField<0, 14, u32> slot_chroma_width; // padded, - 1
BitField<14, 14, u32> slot_chroma_height; // padded, - 1
BitField<28, 4, u32> reserved3;
};
};
static_assert(sizeof(SlotSurfaceConfig) == 0x10, "SlotSurfaceConfig has the wrong size!");
struct LumaKeyStruct {
union {
BitField<0, 20, u64> luma_coeff0; // (0) of 4x1 conversion matrix, S12.8 format
BitField<20, 20, u64> luma_coeff1; // (1) of 4x1 conversion matrix, S12.8 format
BitField<40, 20, u64> luma_coeff2; // (2) of 4x1 conversion matrix, S12.8 format
BitField<60, 4, u64> luma_r_shift;
};
union {
BitField<0, 20, u64> luma_coeff3; // (3) of 4x1 conversion matrix, S12.8 format
BitField<20, 10, u64> luma_key_lower;
BitField<30, 10, u64> luma_key_upper;
BitField<40, 1, u64> luma_key_enabled;
BitField<41, 2, u64> reserved0;
BitField<43, 21, u64> reserved1;
};
};
static_assert(sizeof(LumaKeyStruct) == 0x10, "LumaKeyStruct has the wrong size!");
struct BlendingSlotStruct {
union {
BitField<0, 10, u32> alpha_k1;
BitField<10, 6, u32> reserved0;
BitField<16, 10, u32> alpha_k2;
BitField<26, 6, u32> reserved1;
};
union {
BitField<0, 3, BLEND_SRCFACTC> src_factor_color_match_select;
BitField<3, 1, u32> reserved2;
BitField<4, 3, BLEND_DSTFACTC> dst_factor_color_match_select;
BitField<7, 1, u32> reserved3;
BitField<8, 3, BLEND_SRCFACTA> src_factor_a_match_select;
BitField<11, 1, u32> reserved4;
BitField<12, 3, BLEND_DSTFACTA> dst_factor_a_match_select;
BitField<15, 1, u32> reserved5;
BitField<16, 4, u32> reserved6;
BitField<20, 4, u32> reserved7;
BitField<24, 4, u32> reserved8;
BitField<28, 4, u32> reserved9;
};
union {
BitField<0, 2, u32> reserved10;
BitField<2, 10, u32> override_r;
BitField<12, 10, u32> override_g;
BitField<22, 10, u32> override_b;
};
union {
BitField<0, 10, u32> override_a;
BitField<10, 2, u32> reserved11;
BitField<12, 1, u32> use_override_r;
BitField<13, 1, u32> use_override_g;
BitField<14, 1, u32> use_override_b;
BitField<15, 1, u32> use_override_a;
BitField<16, 1, u32> mask_r;
BitField<17, 1, u32> mask_g;
BitField<18, 1, u32> mask_b;
BitField<19, 1, u32> mask_a;
BitField<20, 12, u32> reserved12;
};
};
static_assert(sizeof(BlendingSlotStruct) == 0x10, "BlendingSlotStruct has the wrong size!");
struct SlotStruct {
SlotConfig config;
SlotSurfaceConfig surface_config;
LumaKeyStruct luma_key;
MatrixStruct color_matrix;
MatrixStruct gamut_matrix;
BlendingSlotStruct blending;
};
static_assert(sizeof(SlotStruct) == 0xB0, "SlotStruct has the wrong size!");
struct ConfigStruct {
PipeConfig pipe_config;
OutputConfig output_config;
OutputSurfaceConfig output_surface_config;
MatrixStruct out_color_matrix;
std::array<ClearRectStruct, 4> clear_rects;
std::array<SlotStruct, 8> slot_structs;
};
static_assert(offsetof(ConfigStruct, pipe_config) == 0x0, "pipe_config is in the wrong place!");
static_assert(offsetof(ConfigStruct, output_config) == 0x10,
"output_config is in the wrong place!");
static_assert(offsetof(ConfigStruct, output_surface_config) == 0x20,
"output_surface_config is in the wrong place!");
static_assert(offsetof(ConfigStruct, out_color_matrix) == 0x30,
"out_color_matrix is in the wrong place!");
static_assert(offsetof(ConfigStruct, clear_rects) == 0x50, "clear_rects is in the wrong place!");
static_assert(offsetof(ConfigStruct, slot_structs) == 0x90, "slot_structs is in the wrong place!");
static_assert(sizeof(ConfigStruct) == 0x610, "ConfigStruct has the wrong size!");
struct VicRegisters {
static constexpr std::size_t NUM_REGS = 0x446;
union {
struct {
INSERT_PADDING_WORDS_NOINIT(0xC0);
u32 execute;
INSERT_PADDING_WORDS_NOINIT(0x3F);
std::array<std::array<PlaneOffsets, 8>, 8> surfaces;
u32 picture_index;
u32 control_params;
Offset config_struct_offset;
Offset filter_struct_offset;
Offset palette_offset;
Offset hist_offset;
u32 context_id;
u32 fce_ucode_size;
PlaneOffsets output_surface;
Offset fce_ucode_offset;
INSERT_PADDING_WORDS_NOINIT(0x4);
std::array<u32, 8> slot_context_ids;
std::array<Offset, 8> comp_tag_buffer_offsets;
std::array<Offset, 8> history_buffer_offset;
INSERT_PADDING_WORDS_NOINIT(0x25D);
u32 pm_trigger_end;
};
std::array<u32, NUM_REGS> reg_array;
};
};
static_assert(offsetof(VicRegisters, execute) == 0x300, "execute is in the wrong place!");
static_assert(offsetof(VicRegisters, surfaces) == 0x400, "surfaces is in the wrong place!");
static_assert(offsetof(VicRegisters, picture_index) == 0x700,
"picture_index is in the wrong place!");
static_assert(offsetof(VicRegisters, control_params) == 0x704,
"control_params is in the wrong place!");
static_assert(offsetof(VicRegisters, config_struct_offset) == 0x708,
"config_struct_offset is in the wrong place!");
static_assert(offsetof(VicRegisters, output_surface) == 0x720,
"output_surface is in the wrong place!");
static_assert(offsetof(VicRegisters, slot_context_ids) == 0x740,
"slot_context_ids is in the wrong place!");
static_assert(offsetof(VicRegisters, history_buffer_offset) == 0x780,
"history_buffer_offset is in the wrong place!");
static_assert(offsetof(VicRegisters, pm_trigger_end) == 0x1114,
"pm_trigger_end is in the wrong place!");
static_assert(sizeof(VicRegisters) == 0x1118, "VicRegisters has the wrong size!");
class Vic final : public CDmaPusher {
public:
enum class Method : u32 {
Execute = 0xc0,
SetControlParams = 0x1c1,
SetConfigStructOffset = 0x1c2,
SetOutputSurfaceLumaOffset = 0x1c8,
SetOutputSurfaceChromaOffset = 0x1c9,
SetOutputSurfaceChromaUnusedOffset = 0x1ca
Execute = offsetof(VicRegisters, execute),
SetControlParams = offsetof(VicRegisters, control_params),
SetConfigStructOffset = offsetof(VicRegisters, config_struct_offset),
SetOutputSurfaceLumaOffset = offsetof(VicRegisters, output_surface.luma),
SetOutputSurfaceChromaOffset = offsetof(VicRegisters, output_surface.chroma_u),
SetOutputSurfaceChromaUnusedOffset = offsetof(VicRegisters, output_surface.chroma_v)
};
explicit Vic(Host1x& host1x, std::shared_ptr<Nvdec> nvdec_processor);
explicit Vic(Host1x& host1x, s32 id, u32 syncpt, FrameQueue& frame_queue);
~Vic();
/// Write to the device state.
void ProcessMethod(Method method, u32 argument);
void ProcessMethod(u32 method, u32 arg) override;
private:
void Execute();
void WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
void Blend(const ConfigStruct& config, const SlotStruct& slot);
void WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
template <bool Planar, bool Interlaced = false>
void ReadProgressiveY8__V8U8_N420(const SlotStruct& slot, std::span<const PlaneOffsets> offsets,
std::shared_ptr<const FFmpeg::Frame> frame);
template <bool Planar, bool TopField>
void ReadInterlacedY8__V8U8_N420(const SlotStruct& slot, std::span<const PlaneOffsets> offsets,
std::shared_ptr<const FFmpeg::Frame> frame);
Host1x& host1x;
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
template <bool Planar>
void ReadY8__V8U8_N420(const SlotStruct& slot, std::span<const PlaneOffsets> offsets,
std::shared_ptr<const FFmpeg::Frame> frame);
/// Avoid reallocation of the following buffers every frame, as their
/// size does not change during a stream
using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
AVMallocPtr converted_frame_buffer;
Common::ScratchBuffer<u8> luma_buffer;
Common::ScratchBuffer<u8> chroma_buffer;
void WriteY8__V8U8_N420(const OutputSurfaceConfig& output_surface_config);
GPUVAddr config_struct_address{};
GPUVAddr output_surface_luma_address{};
GPUVAddr output_surface_chroma_address{};
template <VideoPixelFormat Format>
void WriteABGR(const OutputSurfaceConfig& output_surface_config);
SwsContext* scaler_ctx{};
s32 scaler_width{};
s32 scaler_height{};
s32 id;
s32 nvdec_id{-1};
u32 syncpoint;
VicRegisters regs{};
FrameQueue& frame_queue;
const bool has_sse41{false};
Common::ScratchBuffer<Pixel> output_surface;
Common::ScratchBuffer<Pixel> slot_surface;
Common::ScratchBuffer<u8> luma_scratch;
Common::ScratchBuffer<u8> chroma_scratch;
Common::ScratchBuffer<u8> swizzle_scratch;
};
} // namespace Host1x
} // namespace Tegra
} // namespace Tegra::Host1x

View File

@@ -37,6 +37,7 @@ layout(set=0,binding=0) uniform sampler2D InputTexture;
#define A_GPU 1
#define A_GLSL 1
#define FSR_RCAS_PASSTHROUGH_ALPHA 1
#ifndef YUZU_USE_FP16
#include "ffx_a.h"
@@ -71,9 +72,7 @@ layout(set=0,binding=0) uniform sampler2D InputTexture;
#include "ffx_fsr1.h"
#if USE_RCAS
layout(location = 0) in vec2 frag_texcoord;
#endif
layout (location = 0) in vec2 frag_texcoord;
layout (location = 0) out vec4 frag_color;
void CurrFilter(AU2 pos) {
@@ -81,22 +80,22 @@ void CurrFilter(AU2 pos) {
#ifndef YUZU_USE_FP16
AF3 c;
FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
frag_color = AF4(c, 1.0);
frag_color = AF4(c, texture(InputTexture, frag_texcoord).a);
#else
AH3 c;
FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
frag_color = AH4(c, 1.0);
frag_color = AH4(c, texture(InputTexture, frag_texcoord).a);
#endif
#endif
#if USE_RCAS
#ifndef YUZU_USE_FP16
AF3 c;
FsrRcasF(c.r, c.g, c.b, pos, Const0);
frag_color = AF4(c, 1.0);
AF4 c;
FsrRcasF(c.r, c.g, c.b, c.a, pos, Const0);
frag_color = c;
#else
AH3 c;
FsrRcasH(c.r, c.g, c.b, pos, Const0);
frag_color = AH4(c, 1.0);
AH4 c;
FsrRcasH(c.r, c.g, c.b, c.a, pos, Const0);
frag_color = c;
#endif
#endif
}

View File

@@ -71,5 +71,5 @@ vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) {
}
void main() {
frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0);
frag_color = vec4(FxaaPixelShader(posPos, input_texture), texture(input_texture, posPos.xy).a);
}

View File

@@ -31,6 +31,7 @@ layout (location = 0) uniform uvec4 constants[4];
#define A_GPU 1
#define A_GLSL 1
#define FSR_RCAS_PASSTHROUGH_ALPHA 1
#ifdef YUZU_USE_FP16
#define A_HALF
@@ -67,9 +68,7 @@ layout (location = 0) uniform uvec4 constants[4];
#include "ffx_fsr1.h"
#if USE_RCAS
layout(location = 0) in vec2 frag_texcoord;
#endif
layout (location = 0) in vec2 frag_texcoord;
layout (location = 0) out vec4 frag_color;
void CurrFilter(AU2 pos)
@@ -78,22 +77,22 @@ void CurrFilter(AU2 pos)
#ifndef YUZU_USE_FP16
AF3 c;
FsrEasuF(c, pos, constants[0], constants[1], constants[2], constants[3]);
frag_color = AF4(c, 1.0);
frag_color = AF4(c, texture(InputTexture, frag_texcoord).a);
#else
AH3 c;
FsrEasuH(c, pos, constants[0], constants[1], constants[2], constants[3]);
frag_color = AH4(c, 1.0);
frag_color = AH4(c, texture(InputTexture, frag_texcoord).a);
#endif
#endif
#if USE_RCAS
#ifndef YUZU_USE_FP16
AF3 c;
FsrRcasF(c.r, c.g, c.b, pos, constants[0]);
frag_color = AF4(c, 1.0);
AF4 c;
FsrRcasF(c.r, c.g, c.b, c.a, pos, constants[0]);
frag_color = c;
#else
AH3 c;
FsrRcasH(c.r, c.g, c.b, pos, constants[0]);
frag_color = AH4(c, 1.0);
FsrRcasH(c.r, c.g, c.b, c.a, pos, constants[0]);
frag_color = c;
#endif
#endif
}

View File

@@ -9,5 +9,5 @@ layout (location = 0) out vec4 color;
layout (binding = 0) uniform sampler2D color_texture;
void main() {
color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
color = vec4(texture(color_texture, frag_tex_coord));
}

View File

@@ -52,5 +52,5 @@ vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) {
}
void main() {
color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f);
color = textureBicubic(color_texture, frag_tex_coord);
}

View File

@@ -46,14 +46,14 @@ vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) {
}
void main() {
vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0];
vec4 base = texture(color_texture, vec2(frag_tex_coord)) * weight[0];
vec2 tex_offset = 1.0f / textureSize(color_texture, 0);
// TODO(Blinkhawk): This code can be optimized through shader group instructions.
vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb;
vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
color = vec4(combination + base, 1.0f);
vec4 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset);
vec4 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset);
vec4 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset);
vec4 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0));
vec4 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
color = combination + base;
}

View File

@@ -6,5 +6,6 @@
#define YUZU_USE_FP16
#define USE_EASU 1
#define VERSION 1
#include "fidelityfx_fsr.frag"

View File

@@ -5,5 +5,6 @@
#extension GL_GOOGLE_include_directive : enable
#define USE_EASU 1
#define VERSION 1
#include "fidelityfx_fsr.frag"

View File

@@ -6,5 +6,6 @@
#define YUZU_USE_FP16
#define USE_RCAS 1
#define VERSION 1
#include "fidelityfx_fsr.frag"

View File

@@ -5,5 +5,6 @@
#extension GL_GOOGLE_include_directive : enable
#define USE_RCAS 1
#define VERSION 1
#include "fidelityfx_fsr.frag"

View File

@@ -5,7 +5,7 @@
#extension GL_GOOGLE_include_directive : enable
#define VERSION 1
#define VERSION 2
#define YUZU_USE_FP16
#include "opengl_present_scaleforce.frag"

View File

@@ -5,6 +5,6 @@
#extension GL_GOOGLE_include_directive : enable
#define VERSION 1
#define VERSION 2
#include "opengl_present_scaleforce.frag"

View File

@@ -42,6 +42,8 @@ public:
u64 page_bits_ = 12);
~MemoryManager();
static constexpr bool HAS_FLUSH_INVALIDATION = true;
size_t GetID() const {
return unique_identifier;
}

37
src/video_core/present.h Normal file
View File

@@ -0,0 +1,37 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/settings.h"
static inline Settings::ScalingFilter GetScalingFilter() {
return Settings::values.scaling_filter.GetValue();
}
static inline Settings::AntiAliasing GetAntiAliasing() {
return Settings::values.anti_aliasing.GetValue();
}
static inline Settings::ScalingFilter GetScalingFilterForAppletCapture() {
return Settings::ScalingFilter::Bilinear;
}
static inline Settings::AntiAliasing GetAntiAliasingForAppletCapture() {
return Settings::AntiAliasing::None;
}
struct PresentFilters {
Settings::ScalingFilter (*get_scaling_filter)();
Settings::AntiAliasing (*get_anti_aliasing)();
};
constexpr PresentFilters PresentFiltersForDisplay{
.get_scaling_filter = &GetScalingFilter,
.get_anti_aliasing = &GetAntiAliasing,
};
constexpr PresentFilters PresentFiltersForAppletCapture{
.get_scaling_filter = &GetScalingFilterForAppletCapture,
.get_anti_aliasing = &GetAntiAliasingForAppletCapture,
};

View File

@@ -40,6 +40,9 @@ public:
/// Finalize rendering the guest frame and draw into the presentation texture
virtual void Composite(std::span<const Tegra::FramebufferConfig> layers) = 0;
/// Get the tiled applet layer capture buffer
virtual std::vector<u8> GetAppletCaptureBuffer() = 0;
[[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
[[nodiscard]] virtual std::string GetDeviceVendor() const = 0;

Some files were not shown because too many files have changed in this diff Show More