Compare commits

..

98 Commits

Author SHA1 Message Date
Liam
5b7c0f13d3 fsp_srv: stub GetCacheStorageSize 2022-10-08 12:24:00 -04:00
Mai
155213484b Merge pull request #9016 from liamwhite/drunken-schedule
vk_scheduler: wait for command processing to complete
2022-10-07 20:27:16 -04:00
Mai
b7ad83383f Merge pull request #8932 from abouvier/cmake-pkgconfig
cmake: Fix FindPkgConfig
2022-10-07 20:25:51 -04:00
Mai
6f101e0f02 Merge pull request #9030 from Morph1984/api-disable
configure_graphics: Fix graphics API selection when a game is running
2022-10-07 20:25:23 -04:00
liamwhite
972b93bf00 Merge pull request #8807 from Docteh/default_fonts
Qt: work around Qt5's font choice for Chinese (in Windows)
2022-10-07 17:39:39 -04:00
Morph
1e35ade1ec configure_graphics: Fix graphics API selection when a game is running
The graphics API setting should not be changed when a game is running.
2022-10-07 15:11:26 -04:00
Narr the Reg
b8777b6653 Merge pull request #9028 from liamwhite/wtype-limits
nfp_types: silence -Wtype-limits
2022-10-07 09:03:35 -05:00
Liam
9574429c5f nfp_types: silence -Wtype-limits 2022-10-07 06:52:28 -04:00
bunnei
61883d8820 Merge pull request #6142 from lat9nq/prog_meta_ref_bind_address
program_metadata: Avoid reference binding to misaligned address
2022-10-06 20:42:15 -07:00
bunnei
bb86fc573f Merge pull request #8944 from Tachi107/patch-2
build(room): simplify yuzu-room installation
2022-10-06 16:59:04 -07:00
Fernando S
1effa578f1 Merge pull request #8467 from FernandoS27/yfc-rel-1
Project yuzu Fried Chicken (Y.F.C.) Part 1
2022-10-06 21:29:53 +02:00
Byte
df6dffa30b vulkan_blitter: Fix pool allocation double free. 2022-10-06 21:00:54 +02:00
Liam
aedd739631 maxwell_dma: remove warnings from implemented functionality 2022-10-06 21:00:54 +02:00
Fernando Sahmkow
ca3db0d7c9 General: address feedback 2022-10-06 21:00:54 +02:00
Liam
0d99b7962d state_tracker: workaround channel setup for homebrew 2022-10-06 21:00:54 +02:00
Liam
c80ed6d81f general: rework usages of UNREACHABLE macro 2022-10-06 21:00:54 +02:00
Morph
903705043d nvdisp: End system frame after requesting to swap buffers
Fixes frametime reporting
2022-10-06 21:00:54 +02:00
Morph
11e1cbbdbd address_space: Rename va_start to virt_start
Avoids conflicting with the va_start macro
2022-10-06 21:00:54 +02:00
Morph
fa342cae22 address_space: Address feedback 2022-10-06 21:00:54 +02:00
Morph
fedd983f96 general: Format licenses as per SPDX guidelines 2022-10-06 21:00:54 +02:00
Fernando Sahmkow
d97d409647 NvHostChannels: improve hack for supporting multiple channels. 2022-10-06 21:00:54 +02:00
Fernando Sahmkow
c2b7de66b3 Address Feedback from bylaws. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
8a372035db Nvflinger: correct duplication. 2022-10-06 21:00:53 +02:00
VonChenPlus
9982cff98b Core: Fix get nvmap object random crash 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
fe24c65153 General: Fix clang format. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
1a9b71b1c6 Common: Fix variable shadowing. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
cdce7f781b Vulkan Swapchain: Overall improvements. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
8d774e7415 NvDec: Fix regressions. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
ada09778d9 Vulkan Texture Cache: Limit render area to the max width/height of the targets. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
8fd1d769fe ImageBase: Basic fixes. 2022-10-06 21:00:53 +02:00
Liam White
afab6c143c General: Fix compilation for GCC 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
fd7afda1e8 VideoCore: Implement formats needed for N64 emulation. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
770e19f51a Buffer Cache: Deduce vertex array limit from memory layout when limit is the highest possible. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
8bb604b3be VideoCore: Add option to dump the macros. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
a9ca39f859 NVDRV: Further improvements. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
b59ca4df0c Buffer Cache: Basic fixes. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
98317f2b77 Decoders: Improve overall speed. 2022-10-06 21:00:53 +02:00
bunnei
f5fd6b5c86 DMA & InlineToMemory Engines Rework. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
b2099fbdcc Maxwell3D: Add small_index_2 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
7cfa28a666 Memory Manager: ensure safety of GPU to CPU address. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
5a568b1655 MemoryManager: Fix errors popping out. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
3d02143476 Shader Decompiler: implement better tracking for Vulkan samplers. 2022-10-06 21:00:53 +02:00
Fernando Sahmkow
ba34cf0a69 Shader Decompiler: Check for shift when deriving composite samplers. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
a283eda320 Shader Decompiler: Fix dangerous behavior of invalid iterator insertion. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
359f22b808 MemoryManager: Finish up the initial implementation. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
5caa150e9a OpenGL: Fix TickWork 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
bc8b3d225e VideoCore: Refactor fencing system. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
4d60410dd9 MemoryManager: initial multi paging system implementation. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
98b5e236d4 Vulkan: Fix Scissor on Clears 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
920429fde7 NVDRV: Further refactors and eliminate old code. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
2931101e6f NVDRV: Refactor Host1x 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
668e80a9f4 VideoCore: Refactor syncing. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
e44ac8b821 Texture Cache: Fix GC and GPU Modified on Joins. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
f350c3d74e Texture cache: Fix the remaining issues with memory mnagement and unmapping. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
9cf4c8831d Texture cache: Fix dangling references on multichannel. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
e462191482 Refactor VideoCore to use AS sepparate from Channel. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
bb74973bba General: Rebase fixes. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
6fc4012396 VideoCore: Extra Fixes. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
feb49c822d NVDRV: Remake ASGPU 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
c6ea0c650e NVDRV: Update copyright notices. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
0f4ae3cc52 MemoryManager: Temporary Fix for NVDEC. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
835b950f7e NvHostCtrl: Fix merge of nvflinger. 2022-10-06 21:00:52 +02:00
Fernando Sahmkow
cbaf3fb433 VideoCore: Update MemoryManager 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
b617874724 Common: implement MultiLevelPageTable. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
3f8e7a5585 VideoCore: Fix channels with disk pipeline/shader cache. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
d7990c159e OpenGl: Implement Channels. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
2c62563ab5 NVHOST_CTRl: Implement missing method and fix some stuffs. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
139ea93512 VideoCore: implement channels on gpu caches. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
c77b8df12e NVASGPU: Fix Remap. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
ad038609c8 NVDRV: Fix clearing when destroying. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
68d9504a04 NVMAP: Fix the Free return parameters. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
af35dbcf63 NVDRV: Fix Open/Close and make sure each device is correctly created. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
de0e8eff42 NVDRV: Implement new NvMap 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
3cbe352c18 NVDRV: Refactor and add new NvMap. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
a21b8824fb NVDRV: Cleanup. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
d30b885d71 NVDRV: Implement QueryEvent. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
39a5ce4e69 NvHost: Remake Ctrl Implementation. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
ac104a24d1 NvHost: Try a different approach to blocking. 2022-10-06 21:00:51 +02:00
Fernando Sahmkow
7b7f6f1cb7 NvHost: Fix some regressions and correct signaling on timeout. 2022-10-06 21:00:51 +02:00
Fernando S
31d4bc6953 Merge pull request #9025 from FernandoS27/slava-ukrayini
Texture Cache: Add ASTC 10x5 Format.
2022-10-06 17:10:28 +02:00
Fernando Sahmkow
1a49991676 Texture Cache: Add ASTC 10x5 Format. 2022-10-06 16:45:40 +02:00
bunnei
d55096ce85 Merge pull request #9013 from liamwhite/spinning-a-yarn
common: remove "yuzu:" prefix from thread names
2022-10-05 18:53:42 -07:00
bunnei
1689530f52 Merge pull request #9015 from german77/amiibo-rewrite
service: nfp: Fix errors to pass unit testing
2022-10-05 14:13:57 -07:00
Kyle Kienapfel
3b5a937125 Show error from cpp-httplib when we don't have a response to read (report errors while connecting to API) (#8999)
Co-authored-by: Kyle Kienapfel <Docteh@users.noreply.github.com>
2022-10-05 21:39:54 +02:00
Fernando S
71fe9fd0f2 Merge pull request #8987 from liamwhite/another-name-for-reinforcement-steel
vulkan: automatically use larger staging buffer sizes when possible
2022-10-05 08:54:22 +02:00
Fernando S
4774e32593 Merge pull request #9011 from liamwhite/frog-emoji-moment
shader_recompiler: add extended LDC to GLASM backend
2022-10-05 08:53:26 +02:00
bunnei
fc0ace6048 Merge pull request #9005 from liamwhite/micro-fit
macro_jit_x64: cancel exit for taken branch
2022-10-04 20:08:02 -07:00
bunnei
92c0ad23eb Merge pull request #9010 from liamwhite/buttwise
macro_jit_x64: fix miscompilation of bit extraction operations
2022-10-04 15:52:39 -07:00
Narr the Reg
e85c19adcb service: nfp: Fix errors to pass unit testing 2022-10-03 18:06:55 -05:00
Liam
35d3e7db2a common: remove "yuzu:" prefix from thread names 2022-10-03 18:43:56 -04:00
Liam
ae7062d522 shader_recompiler: add extended LDC to GLASM backend 2022-10-02 17:32:54 -04:00
Liam
1225627515 macro_jit_x64: fix miscompilation of bit extraction operations 2022-10-01 20:31:21 -04:00
Kyle Kienapfel
1dba5fab62 Qt: work around Qt5's font choice for Chinese
On Windows there are currently two fonts used.

The first, does the Menu, QTreeView and Tooltips
Second is Everything else which is a default font.

From inspecting QApplication::font() at runtime
Windows 10 English: QFont(MS Shell Dlg 2,8.25,-1,5,50,0,0,0,0,0)
Windows 11 Japanese:        MS UI Gothic,9   ,-1,5,50,0,0,0,0,0
Windows 11 Traditional Chinese: PMingLiU,9   ,-1,5,50,0,0,0,0,0
Windows 11 Simplified Chinese:    SimSun,9   ,-1,5,50,0,0,0,0,0
Windows 11 Korean:                 Gulim,9   ,-1,5,50,0,0,0,0,0

I initially investigated dynamically changing the font when
the UI language is English, but this was getting quite messy

Qt6 makes changes to default font in some situations, so this
PR is being narrowed in scope to only effect Chinese font choices.
This change only effects rendering of Latin/Cyrillic characters.
2022-10-01 15:27:23 -07:00
Liam
b80f7faebe macro_jit_x64: cancel exit for taken branch 2022-10-01 01:32:24 -04:00
Liam
087c6c2ef1 vulkan: automatically use larger staging buffer sizes when possible 2022-09-25 02:28:03 -04:00
Andrea Pappacoda
db88eaa346 build(room): simplify yuzu-room installation
CMake is able to automatically install binaries in the correct location. Also see my older patch, af94bf4a59

Cc: @FearlessTobi
2022-09-22 21:51:56 +02:00
Alexandre Bouvier
09a87966e0 cmake: Fix FindPkgConfig 2022-09-20 22:21:52 +02:00
lat9nq
bfb7cbc292 program_metadata: Unpack FileAccessHeader and FileAccessControl
Avoids a reference binding to a misaligned addresses. Unpacking one
requires unpacking the other, otherwise there'll be a misaligned address
on the leftover one.
2022-02-13 02:20:56 -05:00
205 changed files with 6860 additions and 3380 deletions

View File

@@ -252,7 +252,7 @@ if(ENABLE_QT)
endif()
# Check for headers
Include(FindPkgConfig REQUIRED)
find_package(PkgConfig REQUIRED)
pkg_check_modules(QT_DEP_GLU QUIET glu>=9.0.0)
if (NOT QT_DEP_GLU_FOUND)
message(FATAL_ERROR "Qt bundled pacakge dependency `glu` not found. \
@@ -386,7 +386,7 @@ endif()
# Ensure libusb is properly configured (based on dolphin libusb include)
if(NOT APPLE AND NOT YUZU_USE_BUNDLED_LIBUSB)
include(FindPkgConfig)
find_package(PkgConfig)
if (PKG_CONFIG_FOUND AND NOT CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD")
pkg_check_modules(LIBUSB QUIET libusb-1.0>=1.0.24)
else()
@@ -410,7 +410,7 @@ set(FFmpeg_COMPONENTS
swscale)
if (UNIX AND NOT APPLE)
Include(FindPkgConfig REQUIRED)
find_package(PkgConfig REQUIRED)
pkg_check_modules(LIBVA libva)
endif()
if (NOT YUZU_USE_BUNDLED_FFMPEG)

View File

@@ -43,7 +43,7 @@ if (NOT WIN32)
CACHE PATH "Paths to FFmpeg libraries" FORCE)
endforeach()
Include(FindPkgConfig REQUIRED)
find_package(PkgConfig REQUIRED)
pkg_check_modules(LIBVA libva)
pkg_check_modules(CUDA cuda)
pkg_check_modules(FFNVCODEC ffnvcodec)

View File

@@ -108,7 +108,7 @@ if (MINGW OR (${CMAKE_SYSTEM_NAME} MATCHES "Linux") OR APPLE)
target_include_directories(usb INTERFACE "${LIBUSB_INCLUDE_DIRS}")
if (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
Include(FindPkgConfig)
find_package(PkgConfig)
pkg_check_modules(LIBUDEV REQUIRED libudev)
if (LIBUDEV_FOUND)

View File

@@ -121,6 +121,7 @@ else()
if (ARCHITECTURE_x86_64)
add_compile_options("-mcx16")
add_compile_options("-fwrapv")
endif()
if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)

View File

@@ -132,7 +132,7 @@ void AudioRenderer::CreateSinkStreams() {
}
void AudioRenderer::ThreadFunc() {
constexpr char name[]{"yuzu:AudioRenderer"};
constexpr char name[]{"AudioRenderer"};
MicroProfileOnThreadCreate(name);
Common::SetCurrentThreadName(name);
Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);

View File

@@ -94,7 +94,7 @@ bool SystemManager::Remove(System& system_) {
}
void SystemManager::ThreadFunc() {
constexpr char name[]{"yuzu:AudioRenderSystemManager"};
constexpr char name[]{"AudioRenderSystemManager"};
MicroProfileOnThreadCreate(name);
Common::SetCurrentThreadName(name);
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);

View File

@@ -17,6 +17,8 @@ endif ()
include(GenerateSCMRev)
add_library(common STATIC
address_space.cpp
address_space.h
algorithm.h
alignment.h
announce_multiplayer_room.h
@@ -81,6 +83,8 @@ add_library(common STATIC
microprofile.cpp
microprofile.h
microprofileui.h
multi_level_page_table.cpp
multi_level_page_table.h
nvidia_flags.cpp
nvidia_flags.h
page_table.cpp

View File

@@ -0,0 +1,10 @@
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#include "common/address_space.inc"
namespace Common {
template class Common::FlatAllocator<u32, 0, 32>;
}

150
src/common/address_space.h Normal file
View File

@@ -0,0 +1,150 @@
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <concepts>
#include <functional>
#include <mutex>
#include <vector>
#include "common/common_types.h"
namespace Common {
template <typename VaType, size_t AddressSpaceBits>
concept AddressSpaceValid = std::is_unsigned_v<VaType> && sizeof(VaType) * 8 >= AddressSpaceBits;
struct EmptyStruct {};
/**
* @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector
*/
template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa,
bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo = EmptyStruct>
requires AddressSpaceValid<VaType, AddressSpaceBits>
class FlatAddressSpaceMap {
public:
/// The maximum VA that this AS can technically reach
static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) +
((1ULL << (AddressSpaceBits - 1)) - 1)};
explicit FlatAddressSpaceMap(VaType va_limit,
std::function<void(VaType, VaType)> unmap_callback = {});
FlatAddressSpaceMap() = default;
void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info = {}) {
std::scoped_lock lock(block_mutex);
MapLocked(virt, phys, size, extra_info);
}
void Unmap(VaType virt, VaType size) {
std::scoped_lock lock(block_mutex);
UnmapLocked(virt, size);
}
VaType GetVALimit() const {
return va_limit;
}
protected:
/**
* @brief Represents a block of memory in the AS, the physical mapping is contiguous until
* another block with a different phys address is hit
*/
struct Block {
/// VA of the block
VaType virt{UnmappedVa};
/// PA of the block, will increase 1-1 with VA until a new block is encountered
PaType phys{UnmappedPa};
[[no_unique_address]] ExtraBlockInfo extra_info;
Block() = default;
Block(VaType virt_, PaType phys_, ExtraBlockInfo extra_info_)
: virt(virt_), phys(phys_), extra_info(extra_info_) {}
bool Valid() const {
return virt != UnmappedVa;
}
bool Mapped() const {
return phys != UnmappedPa;
}
bool Unmapped() const {
return phys == UnmappedPa;
}
bool operator<(const VaType& p_virt) const {
return virt < p_virt;
}
};
/**
* @brief Maps a PA range into the given AS region
* @note block_mutex MUST be locked when calling this
*/
void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info);
/**
* @brief Unmaps the given range and merges it with other unmapped regions
* @note block_mutex MUST be locked when calling this
*/
void UnmapLocked(VaType virt, VaType size);
std::mutex block_mutex;
std::vector<Block> blocks{Block{}};
/// a soft limit on the maximum VA of the AS
VaType va_limit{VaMaximum};
private:
/// Callback called when the mappings in an region have changed
std::function<void(VaType, VaType)> unmap_callback{};
};
/**
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an
* initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block
*/
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>
requires AddressSpaceValid<VaType, AddressSpaceBits>
class FlatAllocator
: public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> {
private:
using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
public:
explicit FlatAllocator(VaType virt_start, VaType va_limit = Base::VaMaximum);
/**
* @brief Allocates a region in the AS of the given size and returns its address
*/
VaType Allocate(VaType size);
/**
* @brief Marks the given region in the AS as allocated
*/
void AllocateFixed(VaType virt, VaType size);
/**
* @brief Frees an AS region so it can be used again
*/
void Free(VaType virt, VaType size);
VaType GetVAStart() const {
return virt_start;
}
private:
/// The base VA of the allocator, no allocations will be below this
VaType virt_start;
/**
* The end address for the initial linear allocation pass
* Once this reaches the AS limit the slower allocation path will be used
*/
VaType current_linear_alloc_end;
};
} // namespace Common

View File

@@ -0,0 +1,366 @@
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#include "common/address_space.h"
#include "common/assert.h"
#define MAP_MEMBER(returnType) \
template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \
bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \
requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap< \
VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
#define MAP_MEMBER_CONST() \
template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \
bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \
requires AddressSpaceValid<VaType, AddressSpaceBits> FlatAddressSpaceMap< \
VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
#define MM_MEMBER(returnType) \
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \
FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
#define ALLOC_MEMBER(returnType) \
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \
FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
#define ALLOC_MEMBER_CONST() \
template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
requires AddressSpaceValid<VaType, AddressSpaceBits> \
FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
namespace Common {
MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType va_limit_,
std::function<void(VaType, VaType)> unmap_callback_)
: va_limit{va_limit_}, unmap_callback{std::move(unmap_callback_)} {
if (va_limit > VaMaximum) {
ASSERT_MSG(false, "Invalid VA limit!");
}
}
MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extra_info) {
VaType virt_end{virt + size};
if (virt_end > va_limit) {
ASSERT_MSG(false,
"Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}",
virt_end, va_limit);
}
auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)};
if (block_end_successor == blocks.begin()) {
ASSERT_MSG(false, "Trying to map a block before the VA start: virt_end: 0x{:X}", virt_end);
}
auto block_end_predecessor{std::prev(block_end_successor)};
if (block_end_successor != blocks.end()) {
// We have blocks in front of us, if one is directly in front then we don't have to add a
// tail
if (block_end_successor->virt != virt_end) {
PaType tailPhys{[&]() -> PaType {
if constexpr (!PaContigSplit) {
// Always propagate unmapped regions rather than calculating offset
return block_end_predecessor->phys;
} else {
if (block_end_predecessor->Unmapped()) {
// Always propagate unmapped regions rather than calculating offset
return block_end_predecessor->phys;
} else {
return block_end_predecessor->phys + virt_end - block_end_predecessor->virt;
}
}
}()};
if (block_end_predecessor->virt >= virt) {
// If this block's start would be overlapped by the map then reuse it as a tail
// block
block_end_predecessor->virt = virt_end;
block_end_predecessor->phys = tailPhys;
block_end_predecessor->extra_info = block_end_predecessor->extra_info;
// No longer predecessor anymore
block_end_successor = block_end_predecessor--;
} else {
// Else insert a new one and we're done
blocks.insert(block_end_successor,
{Block(virt, phys, extra_info),
Block(virt_end, tailPhys, block_end_predecessor->extra_info)});
if (unmap_callback) {
unmap_callback(virt, size);
}
return;
}
}
} else {
// block_end_predecessor will always be unmapped as blocks has to be terminated by an
// unmapped chunk
if (block_end_predecessor != blocks.begin() && block_end_predecessor->virt >= virt) {
// Move the unmapped block start backwards
block_end_predecessor->virt = virt_end;
// No longer predecessor anymore
block_end_successor = block_end_predecessor--;
} else {
// Else insert a new one and we're done
blocks.insert(block_end_successor,
{Block(virt, phys, extra_info), Block(virt_end, UnmappedPa, {})});
if (unmap_callback) {
unmap_callback(virt, size);
}
return;
}
}
auto block_start_successor{block_end_successor};
// Walk the block vector to find the start successor as this is more efficient than another
// binary search in most scenarios
while (std::prev(block_start_successor)->virt >= virt) {
block_start_successor--;
}
// Check that the start successor is either the end block or something in between
if (block_start_successor->virt > virt_end) {
ASSERT_MSG(false, "Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
} else if (block_start_successor->virt == virt_end) {
// We need to create a new block as there are none spare that we would overwrite
blocks.insert(block_start_successor, Block(virt, phys, extra_info));
} else {
// Erase overwritten blocks
if (auto eraseStart{std::next(block_start_successor)}; eraseStart != block_end_successor) {
blocks.erase(eraseStart, block_end_successor);
}
// Reuse a block that would otherwise be overwritten as a start block
block_start_successor->virt = virt;
block_start_successor->phys = phys;
block_start_successor->extra_info = extra_info;
}
if (unmap_callback) {
unmap_callback(virt, size);
}
}
MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
VaType virt_end{virt + size};
if (virt_end > va_limit) {
ASSERT_MSG(false,
"Trying to map a block past the VA limit: virt_end: 0x{:X}, va_limit: 0x{:X}",
virt_end, va_limit);
}
auto block_end_successor{std::lower_bound(blocks.begin(), blocks.end(), virt_end)};
if (block_end_successor == blocks.begin()) {
ASSERT_MSG(false, "Trying to unmap a block before the VA start: virt_end: 0x{:X}",
virt_end);
}
auto block_end_predecessor{std::prev(block_end_successor)};
auto walk_back_to_predecessor{[&](auto iter) {
while (iter->virt >= virt) {
iter--;
}
return iter;
}};
auto erase_blocks_with_end_unmapped{[&](auto unmappedEnd) {
auto block_start_predecessor{walk_back_to_predecessor(unmappedEnd)};
auto block_start_successor{std::next(block_start_predecessor)};
auto eraseEnd{[&]() {
if (block_start_predecessor->Unmapped()) {
// If the start predecessor is unmapped then we can erase everything in our region
// and be done
return std::next(unmappedEnd);
} else {
// Else reuse the end predecessor as the start of our unmapped region then erase all
// up to it
unmappedEnd->virt = virt;
return unmappedEnd;
}
}()};
// We can't have two unmapped regions after each other
if (eraseEnd != blocks.end() &&
(eraseEnd == block_start_successor ||
(block_start_predecessor->Unmapped() && eraseEnd->Unmapped()))) {
ASSERT_MSG(false, "Multiple contiguous unmapped regions are unsupported!");
}
blocks.erase(block_start_successor, eraseEnd);
}};
// We can avoid any splitting logic if these are the case
if (block_end_predecessor->Unmapped()) {
if (block_end_predecessor->virt > virt) {
erase_blocks_with_end_unmapped(block_end_predecessor);
}
if (unmap_callback) {
unmap_callback(virt, size);
}
return; // The region is unmapped, bail out early
} else if (block_end_successor->virt == virt_end && block_end_successor->Unmapped()) {
erase_blocks_with_end_unmapped(block_end_successor);
if (unmap_callback) {
unmap_callback(virt, size);
}
return; // The region is unmapped here and doesn't need splitting, bail out early
} else if (block_end_successor == blocks.end()) {
// This should never happen as the end should always follow an unmapped block
ASSERT_MSG(false, "Unexpected Memory Manager state!");
} else if (block_end_successor->virt != virt_end) {
// If one block is directly in front then we don't have to add a tail
// The previous block is mapped so we will need to add a tail with an offset
PaType tailPhys{[&]() {
if constexpr (PaContigSplit) {
return block_end_predecessor->phys + virt_end - block_end_predecessor->virt;
} else {
return block_end_predecessor->phys;
}
}()};
if (block_end_predecessor->virt >= virt) {
// If this block's start would be overlapped by the unmap then reuse it as a tail block
block_end_predecessor->virt = virt_end;
block_end_predecessor->phys = tailPhys;
// No longer predecessor anymore
block_end_successor = block_end_predecessor--;
} else {
blocks.insert(block_end_successor,
{Block(virt, UnmappedPa, {}),
Block(virt_end, tailPhys, block_end_predecessor->extra_info)});
if (unmap_callback) {
unmap_callback(virt, size);
}
// The previous block is mapped and ends before
return;
}
}
// Walk the block vector to find the start predecessor as this is more efficient than another
// binary search in most scenarios
auto block_start_predecessor{walk_back_to_predecessor(block_end_successor)};
auto block_start_successor{std::next(block_start_predecessor)};
if (block_start_successor->virt > virt_end) {
ASSERT_MSG(false, "Unsorted block in AS map: virt: 0x{:X}", block_start_successor->virt);
} else if (block_start_successor->virt == virt_end) {
// There are no blocks between the start and the end that would let us skip inserting a new
// one for head
// The previous block is may be unmapped, if so we don't need to insert any unmaps after it
if (block_start_predecessor->Mapped()) {
blocks.insert(block_start_successor, Block(virt, UnmappedPa, {}));
}
} else if (block_start_predecessor->Unmapped()) {
// If the previous block is unmapped
blocks.erase(block_start_successor, block_end_predecessor);
} else {
// Erase overwritten blocks, skipping the first one as we have written the unmapped start
// block there
if (auto eraseStart{std::next(block_start_successor)}; eraseStart != block_end_successor) {
blocks.erase(eraseStart, block_end_successor);
}
// Add in the unmapped block header
block_start_successor->virt = virt;
block_start_successor->phys = UnmappedPa;
}
if (unmap_callback)
unmap_callback(virt, size);
}
ALLOC_MEMBER_CONST()::FlatAllocator(VaType virt_start_, VaType va_limit_)
: Base{va_limit_}, virt_start{virt_start_}, current_linear_alloc_end{virt_start_} {}
ALLOC_MEMBER(VaType)::Allocate(VaType size) {
std::scoped_lock lock(this->block_mutex);
VaType alloc_start{UnmappedVa};
VaType alloc_end{current_linear_alloc_end + size};
// Avoid searching backwards in the address space if possible
if (alloc_end >= current_linear_alloc_end && alloc_end <= this->va_limit) {
auto alloc_end_successor{
std::lower_bound(this->blocks.begin(), this->blocks.end(), alloc_end)};
if (alloc_end_successor == this->blocks.begin()) {
ASSERT_MSG(false, "First block in AS map is invalid!");
}
auto alloc_end_predecessor{std::prev(alloc_end_successor)};
if (alloc_end_predecessor->virt <= current_linear_alloc_end) {
alloc_start = current_linear_alloc_end;
} else {
// Skip over fixed any mappings in front of us
while (alloc_end_successor != this->blocks.end()) {
if (alloc_end_successor->virt - alloc_end_predecessor->virt < size ||
alloc_end_predecessor->Mapped()) {
alloc_start = alloc_end_predecessor->virt;
break;
}
alloc_end_predecessor = alloc_end_successor++;
// Use the VA limit to calculate if we can fit in the final block since it has no
// successor
if (alloc_end_successor == this->blocks.end()) {
alloc_end = alloc_end_predecessor->virt + size;
if (alloc_end >= alloc_end_predecessor->virt && alloc_end <= this->va_limit) {
alloc_start = alloc_end_predecessor->virt;
}
}
}
}
}
if (alloc_start != UnmappedVa) {
current_linear_alloc_end = alloc_start + size;
} else { // If linear allocation overflows the AS then find a gap
if (this->blocks.size() <= 2) {
ASSERT_MSG(false, "Unexpected allocator state!");
}
auto search_predecessor{this->blocks.begin()};
auto search_successor{std::next(search_predecessor)};
while (search_successor != this->blocks.end() &&
(search_successor->virt - search_predecessor->virt < size ||
search_predecessor->Mapped())) {
search_predecessor = search_successor++;
}
if (search_successor != this->blocks.end()) {
alloc_start = search_predecessor->virt;
} else {
return {}; // AS is full
}
}
this->MapLocked(alloc_start, true, size, {});
return alloc_start;
}
ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
this->Map(virt, true, size);
}
ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
this->Unmap(virt, size);
}
} // namespace Common

View File

@@ -24,4 +24,12 @@ template <class ForwardIt, class T, class Compare = std::less<>>
return first != last && !comp(value, *first) ? first : last;
}
template <typename T, typename Func, typename... Args>
T FoldRight(T initial_value, Func&& func, Args&&... args) {
T value{initial_value};
const auto high_func = [&value, &func]<typename U>(U x) { value = func(value, x); };
(std::invoke(high_func, std::forward<Args>(args)), ...);
return value;
}
} // namespace Common

View File

@@ -18,4 +18,11 @@ struct PairHash {
}
};
template <typename T>
struct IdentityHash {
[[nodiscard]] size_t operator()(T value) const noexcept {
return static_cast<size_t>(value);
}
};
} // namespace Common

View File

@@ -219,7 +219,7 @@ private:
void StartBackendThread() {
backend_thread = std::jthread([this](std::stop_token stop_token) {
Common::SetCurrentThreadName("yuzu:Log");
Common::SetCurrentThreadName("Logger");
Entry entry;
const auto write_logs = [this, &entry]() {
ForEachBackend([&entry](Backend& backend) { backend.Write(entry); });

View File

@@ -0,0 +1,9 @@
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/multi_level_page_table.inc"
namespace Common {
template class Common::MultiLevelPageTable<u64>;
template class Common::MultiLevelPageTable<u32>;
} // namespace Common

View File

@@ -0,0 +1,78 @@
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <type_traits>
#include <utility>
#include <vector>
#include "common/common_types.h"
namespace Common {
template <typename BaseAddr>
class MultiLevelPageTable final {
public:
constexpr MultiLevelPageTable() = default;
explicit MultiLevelPageTable(std::size_t address_space_bits, std::size_t first_level_bits,
std::size_t page_bits);
~MultiLevelPageTable() noexcept;
MultiLevelPageTable(const MultiLevelPageTable&) = delete;
MultiLevelPageTable& operator=(const MultiLevelPageTable&) = delete;
MultiLevelPageTable(MultiLevelPageTable&& other) noexcept
: address_space_bits{std::exchange(other.address_space_bits, 0)},
first_level_bits{std::exchange(other.first_level_bits, 0)}, page_bits{std::exchange(
other.page_bits, 0)},
first_level_shift{std::exchange(other.first_level_shift, 0)},
first_level_chunk_size{std::exchange(other.first_level_chunk_size, 0)},
first_level_map{std::move(other.first_level_map)}, base_ptr{std::exchange(other.base_ptr,
nullptr)} {}
MultiLevelPageTable& operator=(MultiLevelPageTable&& other) noexcept {
address_space_bits = std::exchange(other.address_space_bits, 0);
first_level_bits = std::exchange(other.first_level_bits, 0);
page_bits = std::exchange(other.page_bits, 0);
first_level_shift = std::exchange(other.first_level_shift, 0);
first_level_chunk_size = std::exchange(other.first_level_chunk_size, 0);
alloc_size = std::exchange(other.alloc_size, 0);
first_level_map = std::move(other.first_level_map);
base_ptr = std::exchange(other.base_ptr, nullptr);
return *this;
}
void ReserveRange(u64 start, std::size_t size);
[[nodiscard]] const BaseAddr& operator[](std::size_t index) const {
return base_ptr[index];
}
[[nodiscard]] BaseAddr& operator[](std::size_t index) {
return base_ptr[index];
}
[[nodiscard]] BaseAddr* data() {
return base_ptr;
}
[[nodiscard]] const BaseAddr* data() const {
return base_ptr;
}
private:
void AllocateLevel(u64 level);
std::size_t address_space_bits{};
std::size_t first_level_bits{};
std::size_t page_bits{};
std::size_t first_level_shift{};
std::size_t first_level_chunk_size{};
std::size_t alloc_size{};
std::vector<void*> first_level_map{};
BaseAddr* base_ptr{};
};
} // namespace Common

View File

@@ -0,0 +1,84 @@
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#endif
#include "common/assert.h"
#include "common/multi_level_page_table.h"
namespace Common {
template <typename BaseAddr>
MultiLevelPageTable<BaseAddr>::MultiLevelPageTable(std::size_t address_space_bits_,
std::size_t first_level_bits_,
std::size_t page_bits_)
: address_space_bits{address_space_bits_},
first_level_bits{first_level_bits_}, page_bits{page_bits_} {
if (page_bits == 0) {
return;
}
first_level_shift = address_space_bits - first_level_bits;
first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr);
alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr);
std::size_t first_level_size = 1ULL << first_level_bits;
first_level_map.resize(first_level_size, nullptr);
#ifdef _WIN32
void* base{VirtualAlloc(nullptr, alloc_size, MEM_RESERVE, PAGE_READWRITE)};
#else
void* base{mmap(nullptr, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)};
if (base == MAP_FAILED) {
base = nullptr;
}
#endif
ASSERT(base);
base_ptr = reinterpret_cast<BaseAddr*>(base);
}
template <typename BaseAddr>
MultiLevelPageTable<BaseAddr>::~MultiLevelPageTable() noexcept {
if (!base_ptr) {
return;
}
#ifdef _WIN32
ASSERT(VirtualFree(base_ptr, 0, MEM_RELEASE));
#else
ASSERT(munmap(base_ptr, alloc_size) == 0);
#endif
}
template <typename BaseAddr>
void MultiLevelPageTable<BaseAddr>::ReserveRange(u64 start, std::size_t size) {
const u64 new_start = start >> first_level_shift;
const u64 new_end = (start + size) >> first_level_shift;
for (u64 i = new_start; i <= new_end; i++) {
if (!first_level_map[i]) {
AllocateLevel(i);
}
}
}
template <typename BaseAddr>
void MultiLevelPageTable<BaseAddr>::AllocateLevel(u64 level) {
void* ptr = reinterpret_cast<char *>(base_ptr) + level * first_level_chunk_size;
#ifdef _WIN32
void* base{VirtualAlloc(ptr, first_level_chunk_size, MEM_COMMIT, PAGE_READWRITE)};
#else
void* base{mmap(ptr, first_level_chunk_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)};
if (base == MAP_FAILED) {
base = nullptr;
}
#endif
ASSERT(base);
first_level_map[level] = base;
}
} // namespace Common

View File

@@ -138,8 +138,6 @@ add_library(core STATIC
frontend/emu_window.h
frontend/framebuffer_layout.cpp
frontend/framebuffer_layout.h
hardware_interrupt_manager.cpp
hardware_interrupt_manager.h
hid/emulated_console.cpp
hid/emulated_console.h
hid/emulated_controller.cpp
@@ -550,6 +548,12 @@ add_library(core STATIC
hle/service/ns/ns.h
hle/service/ns/pdm_qry.cpp
hle/service/ns/pdm_qry.h
hle/service/nvdrv/core/container.cpp
hle/service/nvdrv/core/container.h
hle/service/nvdrv/core/nvmap.cpp
hle/service/nvdrv/core/nvmap.h
hle/service/nvdrv/core/syncpoint_manager.cpp
hle/service/nvdrv/core/syncpoint_manager.h
hle/service/nvdrv/devices/nvdevice.h
hle/service/nvdrv/devices/nvdisp_disp0.cpp
hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -578,8 +582,6 @@ add_library(core STATIC
hle/service/nvdrv/nvdrv_interface.h
hle/service/nvdrv/nvmemp.cpp
hle/service/nvdrv/nvmemp.h
hle/service/nvdrv/syncpoint_manager.cpp
hle/service/nvdrv/syncpoint_manager.h
hle/service/nvflinger/binder.h
hle/service/nvflinger/buffer_item.h
hle/service/nvflinger/buffer_item_consumer.cpp

View File

@@ -27,7 +27,6 @@
#include "core/file_sys/savedata_factory.h"
#include "core/file_sys/vfs_concat.h"
#include "core/file_sys/vfs_real.h"
#include "core/hardware_interrupt_manager.h"
#include "core/hid/hid_core.h"
#include "core/hle/kernel/k_memory_manager.h"
#include "core/hle/kernel/k_process.h"
@@ -51,6 +50,7 @@
#include "core/telemetry_session.h"
#include "core/tools/freezer.h"
#include "network/network.h"
#include "video_core/host1x/host1x.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
@@ -215,6 +215,7 @@ struct System::Impl {
telemetry_session = std::make_unique<Core::TelemetrySession>();
host1x_core = std::make_unique<Tegra::Host1x::Host1x>(system);
gpu_core = VideoCore::CreateGPU(emu_window, system);
if (!gpu_core) {
return SystemResultStatus::ErrorVideoCore;
@@ -224,7 +225,6 @@ struct System::Impl {
service_manager = std::make_shared<Service::SM::ServiceManager>(kernel);
services = std::make_unique<Service::Services>(service_manager, system);
interrupt_manager = std::make_unique<Hardware::InterruptManager>(system);
// Initialize time manager, which must happen after kernel is created
time_manager.Initialize();
@@ -373,6 +373,7 @@ struct System::Impl {
app_loader.reset();
audio_core.reset();
gpu_core.reset();
host1x_core.reset();
perf_stats.reset();
kernel.Shutdown();
memory.Reset();
@@ -450,7 +451,7 @@ struct System::Impl {
/// AppLoader used to load the current executing application
std::unique_ptr<Loader::AppLoader> app_loader;
std::unique_ptr<Tegra::GPU> gpu_core;
std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
std::unique_ptr<Tegra::Host1x::Host1x> host1x_core;
std::unique_ptr<Core::DeviceMemory> device_memory;
std::unique_ptr<AudioCore::AudioCore> audio_core;
Core::Memory::Memory memory;
@@ -668,12 +669,12 @@ const Tegra::GPU& System::GPU() const {
return *impl->gpu_core;
}
Core::Hardware::InterruptManager& System::InterruptManager() {
return *impl->interrupt_manager;
Tegra::Host1x::Host1x& System::Host1x() {
return *impl->host1x_core;
}
const Core::Hardware::InterruptManager& System::InterruptManager() const {
return *impl->interrupt_manager;
const Tegra::Host1x::Host1x& System::Host1x() const {
return *impl->host1x_core;
}
VideoCore::RendererBase& System::Renderer() {

View File

@@ -74,6 +74,9 @@ class TimeManager;
namespace Tegra {
class DebugContext;
class GPU;
namespace Host1x {
class Host1x;
} // namespace Host1x
} // namespace Tegra
namespace VideoCore {
@@ -88,10 +91,6 @@ namespace Core::Timing {
class CoreTiming;
}
namespace Core::Hardware {
class InterruptManager;
}
namespace Core::HID {
class HIDCore;
}
@@ -260,6 +259,12 @@ public:
/// Gets an immutable reference to the GPU interface.
[[nodiscard]] const Tegra::GPU& GPU() const;
/// Gets a mutable reference to the Host1x interface
[[nodiscard]] Tegra::Host1x::Host1x& Host1x();
/// Gets an immutable reference to the Host1x interface.
[[nodiscard]] const Tegra::Host1x::Host1x& Host1x() const;
/// Gets a mutable reference to the renderer.
[[nodiscard]] VideoCore::RendererBase& Renderer();
@@ -296,12 +301,6 @@ public:
/// Provides a constant reference to the core timing instance.
[[nodiscard]] const Timing::CoreTiming& CoreTiming() const;
/// Provides a reference to the interrupt manager instance.
[[nodiscard]] Core::Hardware::InterruptManager& InterruptManager();
/// Provides a constant reference to the interrupt manager instance.
[[nodiscard]] const Core::Hardware::InterruptManager& InterruptManager() const;
/// Provides a reference to the kernel instance.
[[nodiscard]] Kernel::KernelCore& Kernel();

View File

@@ -43,7 +43,7 @@ CoreTiming::CoreTiming()
CoreTiming::~CoreTiming() = default;
void CoreTiming::ThreadEntry(CoreTiming& instance) {
constexpr char name[] = "yuzu:HostTiming";
constexpr char name[] = "HostTiming";
MicroProfileOnThreadCreate(name);
Common::SetCurrentThreadName(name);
Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);

View File

@@ -189,9 +189,9 @@ void CpuManager::RunThread(std::size_t core) {
system.RegisterCoreThread(core);
std::string name;
if (is_multicore) {
name = "yuzu:CPUCore_" + std::to_string(core);
name = "CPUCore_" + std::to_string(core);
} else {
name = "yuzu:CPUThread";
name = "CPUThread";
}
MicroProfileOnThreadCreate(name.c_str());
Common::SetCurrentThreadName(name.c_str());

View File

@@ -140,7 +140,7 @@ private:
}
void ThreadLoop(std::stop_token stop_token) {
Common::SetCurrentThreadName("yuzu:Debugger");
Common::SetCurrentThreadName("Debugger");
// Set up the client signals for new data.
AsyncReceiveInto(signal_pipe, pipe_data, [&](auto d) { PipeData(d); });

View File

@@ -33,11 +33,55 @@ Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) {
return Loader::ResultStatus::ErrorBadACIHeader;
}
if (sizeof(FileAccessControl) != file->ReadObject(&acid_file_access, acid_header.fac_offset)) {
// Load acid_file_access per-component instead of the entire struct, since this struct does not
// reflect the layout of the real data.
std::size_t current_offset = acid_header.fac_offset;
if (sizeof(FileAccessControl::version) != file->ReadBytes(&acid_file_access.version,
sizeof(FileAccessControl::version),
current_offset)) {
return Loader::ResultStatus::ErrorBadFileAccessControl;
}
if (sizeof(FileAccessControl::permissions) !=
file->ReadBytes(&acid_file_access.permissions, sizeof(FileAccessControl::permissions),
current_offset += sizeof(FileAccessControl::version) + 3)) {
return Loader::ResultStatus::ErrorBadFileAccessControl;
}
if (sizeof(FileAccessControl::unknown) !=
file->ReadBytes(&acid_file_access.unknown, sizeof(FileAccessControl::unknown),
current_offset + sizeof(FileAccessControl::permissions))) {
return Loader::ResultStatus::ErrorBadFileAccessControl;
}
if (sizeof(FileAccessHeader) != file->ReadObject(&aci_file_access, aci_header.fah_offset)) {
// Load aci_file_access per-component instead of the entire struct, same as acid_file_access
current_offset = aci_header.fah_offset;
if (sizeof(FileAccessHeader::version) != file->ReadBytes(&aci_file_access.version,
sizeof(FileAccessHeader::version),
current_offset)) {
return Loader::ResultStatus::ErrorBadFileAccessHeader;
}
if (sizeof(FileAccessHeader::permissions) !=
file->ReadBytes(&aci_file_access.permissions, sizeof(FileAccessHeader::permissions),
current_offset += sizeof(FileAccessHeader::version) + 3)) {
return Loader::ResultStatus::ErrorBadFileAccessHeader;
}
if (sizeof(FileAccessHeader::unk_offset) !=
file->ReadBytes(&aci_file_access.unk_offset, sizeof(FileAccessHeader::unk_offset),
current_offset += sizeof(FileAccessHeader::permissions))) {
return Loader::ResultStatus::ErrorBadFileAccessHeader;
}
if (sizeof(FileAccessHeader::unk_size) !=
file->ReadBytes(&aci_file_access.unk_size, sizeof(FileAccessHeader::unk_size),
current_offset += sizeof(FileAccessHeader::unk_offset))) {
return Loader::ResultStatus::ErrorBadFileAccessHeader;
}
if (sizeof(FileAccessHeader::unk_offset_2) !=
file->ReadBytes(&aci_file_access.unk_offset_2, sizeof(FileAccessHeader::unk_offset_2),
current_offset += sizeof(FileAccessHeader::unk_size))) {
return Loader::ResultStatus::ErrorBadFileAccessHeader;
}
if (sizeof(FileAccessHeader::unk_size_2) !=
file->ReadBytes(&aci_file_access.unk_size_2, sizeof(FileAccessHeader::unk_size_2),
current_offset + sizeof(FileAccessHeader::unk_offset_2))) {
return Loader::ResultStatus::ErrorBadFileAccessHeader;
}
@@ -152,9 +196,7 @@ void ProgramMetadata::Print() const {
LOG_DEBUG(Service_FS, " > Is Retail: {}", acid_header.is_retail ? "YES" : "NO");
LOG_DEBUG(Service_FS, "Title ID Min: 0x{:016X}", acid_header.title_id_min);
LOG_DEBUG(Service_FS, "Title ID Max: 0x{:016X}", acid_header.title_id_max);
u64_le permissions_l; // local copy to fix alignment error
std::memcpy(&permissions_l, &acid_file_access.permissions, sizeof(permissions_l));
LOG_DEBUG(Service_FS, "Filesystem Access: 0x{:016X}\n", permissions_l);
LOG_DEBUG(Service_FS, "Filesystem Access: 0x{:016X}\n", acid_file_access.permissions);
// Begin ACI0 printing (actual perms, unsigned)
LOG_DEBUG(Service_FS, "Magic: {:.4}", aci_header.magic.data());

View File

@@ -144,20 +144,18 @@ private:
static_assert(sizeof(AciHeader) == 0x40, "ACI0 header structure size is wrong");
#pragma pack(push, 1)
// FileAccessControl and FileAccessHeader need loaded per-component: this layout does not
// reflect the real layout to avoid reference binding to misaligned addresses
struct FileAccessControl {
u8 version;
INSERT_PADDING_BYTES(3);
// 3 padding bytes
u64_le permissions;
std::array<u8, 0x20> unknown;
};
static_assert(sizeof(FileAccessControl) == 0x2C, "FS access control structure size is wrong");
struct FileAccessHeader {
u8 version;
INSERT_PADDING_BYTES(3);
// 3 padding bytes
u64_le permissions;
u32_le unk_offset;
u32_le unk_size;
@@ -165,10 +163,6 @@ private:
u32_le unk_size_2;
};
static_assert(sizeof(FileAccessHeader) == 0x1C, "FS access header structure size is wrong");
#pragma pack(pop)
Header npdm_header;
AciHeader aci_header;
AcidHeader acid_header;

View File

@@ -1,32 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "core/core.h"
#include "core/core_timing.h"
#include "core/hardware_interrupt_manager.h"
#include "core/hle/service/nvdrv/nvdrv_interface.h"
#include "core/hle/service/sm/sm.h"
namespace Core::Hardware {
InterruptManager::InterruptManager(Core::System& system_in) : system(system_in) {
gpu_interrupt_event = Core::Timing::CreateEvent(
"GPUInterrupt",
[this](std::uintptr_t message, u64 time,
std::chrono::nanoseconds) -> std::optional<std::chrono::nanoseconds> {
auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv");
const u32 syncpt = static_cast<u32>(message >> 32);
const u32 value = static_cast<u32>(message);
nvdrv->SignalGPUInterruptSyncpt(syncpt, value);
return std::nullopt;
});
}
InterruptManager::~InterruptManager() = default;
void InterruptManager::GPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
const u64 msg = (static_cast<u64>(syncpoint_id) << 32ULL) | value;
system.CoreTiming().ScheduleEvent(std::chrono::nanoseconds{10}, gpu_interrupt_event, msg);
}
} // namespace Core::Hardware

View File

@@ -1,32 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include "common/common_types.h"
namespace Core {
class System;
}
namespace Core::Timing {
struct EventType;
}
namespace Core::Hardware {
class InterruptManager {
public:
explicit InterruptManager(Core::System& system);
~InterruptManager();
void GPUInterruptSyncpt(u32 syncpoint_id, u32 value);
private:
Core::System& system;
std::shared_ptr<Core::Timing::EventType> gpu_interrupt_event;
};
} // namespace Core::Hardware

View File

@@ -1017,9 +1017,11 @@ bool EmulatedController::SetPollingMode(Common::Input::PollingMode polling_mode)
auto& output_device = output_devices[static_cast<std::size_t>(DeviceIndex::Right)];
auto& nfc_output_device = output_devices[3];
nfc_output_device->SetPollingMode(polling_mode);
const auto virtual_nfc_result = nfc_output_device->SetPollingMode(polling_mode);
const auto mapped_nfc_result = output_device->SetPollingMode(polling_mode);
return output_device->SetPollingMode(polling_mode) == Common::Input::PollingError::None;
return virtual_nfc_result == Common::Input::PollingError::None ||
mapped_nfc_result == Common::Input::PollingError::None;
}
bool EmulatedController::SetCameraFormat(

View File

@@ -23,7 +23,7 @@ void KWorkerTask::DoWorkerTask() {
}
}
KWorkerTaskManager::KWorkerTaskManager() : m_waiting_thread(1, "yuzu:KWorkerTaskManager") {}
KWorkerTaskManager::KWorkerTaskManager() : m_waiting_thread(1, "KWorkerTaskManager") {}
void KWorkerTaskManager::AddTask(KernelCore& kernel, WorkerType type, KWorkerTask* task) {
ASSERT(type <= WorkerType::Count);

View File

@@ -48,7 +48,7 @@ namespace Kernel {
struct KernelCore::Impl {
explicit Impl(Core::System& system_, KernelCore& kernel_)
: time_manager{system_},
service_threads_manager{1, "yuzu:ServiceThreadsManager"}, system{system_} {}
service_threads_manager{1, "ServiceThreadsManager"}, system{system_} {}
void SetMulticore(bool is_multi) {
is_multicore = is_multi;

View File

@@ -36,7 +36,7 @@ ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std
: service_name{name} {
for (std::size_t i = 0; i < num_threads; ++i) {
threads.emplace_back([this, &kernel](std::stop_token stop_token) {
Common::SetCurrentThreadName(std::string{"yuzu:HleService:" + service_name}.c_str());
Common::SetCurrentThreadName(std::string{service_name}.c_str());
// Wait for first request before trying to acquire a render context
{

View File

@@ -707,7 +707,7 @@ FSP_SRV::FSP_SRV(Core::System& system_)
{31, nullptr, "OpenGameCardFileSystem"},
{32, nullptr, "ExtendSaveDataFileSystem"},
{33, nullptr, "DeleteCacheStorage"},
{34, nullptr, "GetCacheStorageSize"},
{34, &FSP_SRV::GetCacheStorageSize, "GetCacheStorageSize"},
{35, nullptr, "CreateSaveDataFileSystemByHashSalt"},
{36, nullptr, "OpenHostFileSystemWithOption"},
{51, &FSP_SRV::OpenSaveDataFileSystem, "OpenSaveDataFileSystem"},
@@ -1107,6 +1107,18 @@ void FSP_SRV::GetProgramIndexForAccessLog(Kernel::HLERequestContext& ctx) {
rb.Push(access_log_program_index);
}
void FSP_SRV::GetCacheStorageSize(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto index{rp.Pop<s32>()};
LOG_WARNING(Service_FS, "(STUBBED) called with index={}", index);
IPC::ResponseBuilder rb{ctx, 6};
rb.Push(ResultSuccess);
rb.Push(s64{0});
rb.Push(s64{0});
}
class IMultiCommitManager final : public ServiceFramework<IMultiCommitManager> {
public:
explicit IMultiCommitManager(Core::System& system_)

View File

@@ -54,6 +54,7 @@ private:
void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx);
void GetProgramIndexForAccessLog(Kernel::HLERequestContext& ctx);
void OpenMultiCommitManager(Kernel::HLERequestContext& ctx);
void GetCacheStorageSize(Kernel::HLERequestContext& ctx);
FileSystemController& fsc;
const FileSys::ContentProvider& content_provider;

View File

@@ -28,7 +28,7 @@ bool IsAmiiboValid(const EncryptedNTAG215File& ntag_file) {
LOG_DEBUG(Service_NFP, "model_number=0x{0:x}",
static_cast<u16>(amiibo_data.model_info.model_number));
LOG_DEBUG(Service_NFP, "series={}", amiibo_data.model_info.series);
LOG_DEBUG(Service_NFP, "fixed_value=0x{0:x}", amiibo_data.model_info.constant_value);
LOG_DEBUG(Service_NFP, "tag_type=0x{0:x}", amiibo_data.model_info.tag_type);
LOG_DEBUG(Service_NFP, "tag_dynamic_lock=0x{0:x}", ntag_file.dynamic_lock);
LOG_DEBUG(Service_NFP, "tag_CFG0=0x{0:x}", ntag_file.CFG0);
@@ -55,7 +55,7 @@ bool IsAmiiboValid(const EncryptedNTAG215File& ntag_file) {
if (amiibo_data.constant_value != 0xA5) {
return false;
}
if (amiibo_data.model_info.constant_value != 0x02) {
if (amiibo_data.model_info.tag_type != PackedTagType::Type2) {
return false;
}
if ((ntag_file.dynamic_lock & 0xFFFFFF) != 0x0F0001U) {

View File

@@ -98,11 +98,6 @@ bool NfpDevice::LoadAmiibo(std::span<const u8> data) {
memcpy(&encrypted_tag_data, data.data(), sizeof(EncryptedNTAG215File));
if (!AmiiboCrypto::IsAmiiboValid(encrypted_tag_data)) {
LOG_INFO(Service_NFP, "Invalid amiibo");
return false;
}
device_state = DeviceState::TagFound;
deactivate_event->GetReadableEvent().Clear();
activate_event->GetWritableEvent().Signal();
@@ -148,20 +143,28 @@ void NfpDevice::Finalize() {
}
Result NfpDevice::StartDetection(s32 protocol_) {
if (device_state == DeviceState::Initialized || device_state == DeviceState::TagRemoved) {
npad_device->SetPollingMode(Common::Input::PollingMode::NFC);
device_state = DeviceState::SearchingForTag;
protocol = protocol_;
return ResultSuccess;
if (device_state != DeviceState::Initialized && device_state != DeviceState::TagRemoved) {
LOG_ERROR(Service_NFP, "Wrong device state {}", device_state);
return WrongDeviceState;
}
LOG_ERROR(Service_NFP, "Wrong device state {}", device_state);
return WrongDeviceState;
if (!npad_device->SetPollingMode(Common::Input::PollingMode::NFC)) {
LOG_ERROR(Service_NFP, "Nfc not supported");
return NfcDisabled;
}
device_state = DeviceState::SearchingForTag;
protocol = protocol_;
return ResultSuccess;
}
Result NfpDevice::StopDetection() {
npad_device->SetPollingMode(Common::Input::PollingMode::Active);
if (device_state == DeviceState::Initialized) {
return ResultSuccess;
}
if (device_state == DeviceState::TagFound || device_state == DeviceState::TagMounted) {
CloseAmiibo();
return ResultSuccess;
@@ -225,6 +228,11 @@ Result NfpDevice::Mount(MountTarget mount_target_) {
return WrongDeviceState;
}
if (!AmiiboCrypto::IsAmiiboValid(encrypted_tag_data)) {
LOG_ERROR(Service_NFP, "Not an amiibo");
return NotAnAmiibo;
}
if (!AmiiboCrypto::DecodeAmiibo(encrypted_tag_data, tag_data)) {
LOG_ERROR(Service_NFP, "Can't decode amiibo {}", device_state);
return CorruptedData;
@@ -238,6 +246,9 @@ Result NfpDevice::Mount(MountTarget mount_target_) {
Result NfpDevice::Unmount() {
if (device_state != DeviceState::TagMounted) {
LOG_ERROR(Service_NFP, "Wrong device state {}", device_state);
if (device_state == DeviceState::TagRemoved) {
return TagRemoved;
}
return WrongDeviceState;
}
@@ -256,6 +267,9 @@ Result NfpDevice::Unmount() {
Result NfpDevice::GetTagInfo(TagInfo& tag_info) const {
if (device_state != DeviceState::TagFound && device_state != DeviceState::TagMounted) {
LOG_ERROR(Service_NFP, "Wrong device state {}", device_state);
if (device_state == DeviceState::TagRemoved) {
return TagRemoved;
}
return WrongDeviceState;
}
@@ -287,12 +301,7 @@ Result NfpDevice::GetCommonInfo(CommonInfo& common_info) const {
// TODO: Validate this data
common_info = {
.last_write_date =
{
settings.write_date.GetYear(),
settings.write_date.GetMonth(),
settings.write_date.GetDay(),
},
.last_write_date = settings.write_date.GetWriteDate(),
.write_counter = tag_data.write_counter,
.version = 0,
.application_area_size = sizeof(ApplicationArea),
@@ -303,6 +312,9 @@ Result NfpDevice::GetCommonInfo(CommonInfo& common_info) const {
Result NfpDevice::GetModelInfo(ModelInfo& model_info) const {
if (device_state != DeviceState::TagMounted) {
LOG_ERROR(Service_NFP, "Wrong device state {}", device_state);
if (device_state == DeviceState::TagRemoved) {
return TagRemoved;
}
return WrongDeviceState;
}
@@ -341,12 +353,7 @@ Result NfpDevice::GetRegisterInfo(RegisterInfo& register_info) const {
// TODO: Validate this data
register_info = {
.mii_char_info = manager.ConvertV3ToCharInfo(tag_data.owner_mii),
.creation_date =
{
settings.init_date.GetYear(),
settings.init_date.GetMonth(),
settings.init_date.GetDay(),
},
.creation_date = settings.init_date.GetWriteDate(),
.amiibo_name = GetAmiiboName(settings),
.font_region = {},
};
@@ -478,8 +485,7 @@ Result NfpDevice::GetApplicationArea(std::vector<u8>& data) const {
}
if (data.size() > sizeof(ApplicationArea)) {
LOG_ERROR(Service_NFP, "Wrong data size {}", data.size());
return ResultUnknown;
data.resize(sizeof(ApplicationArea));
}
memcpy(data.data(), tag_data.application_area.data(), data.size());
@@ -518,7 +524,7 @@ Result NfpDevice::SetApplicationArea(std::span<const u8> data) {
Common::TinyMT rng{};
std::memcpy(tag_data.application_area.data(), data.data(), data.size());
// HW seems to fill excess data with garbage
// Fill remaining data with random numbers
rng.GenerateRandomBytes(tag_data.application_area.data() + data.size(),
sizeof(ApplicationArea) - data.size());
@@ -561,12 +567,12 @@ Result NfpDevice::RecreateApplicationArea(u32 access_id, std::span<const u8> dat
if (data.size() > sizeof(ApplicationArea)) {
LOG_ERROR(Service_NFP, "Wrong data size {}", data.size());
return ResultUnknown;
return WrongApplicationAreaSize;
}
Common::TinyMT rng{};
std::memcpy(tag_data.application_area.data(), data.data(), data.size());
// HW seems to fill excess data with garbage
// Fill remaining data with random numbers
rng.GenerateRandomBytes(tag_data.application_area.data() + data.size(),
sizeof(ApplicationArea) - data.size());
@@ -612,7 +618,6 @@ u64 NfpDevice::GetHandle() const {
}
u32 NfpDevice::GetApplicationAreaSize() const {
// Investigate if this value is really constant
return sizeof(ApplicationArea);
}

View File

@@ -8,6 +8,8 @@
namespace Service::NFP {
constexpr Result DeviceNotFound(ErrorModule::NFP, 64);
constexpr Result InvalidArgument(ErrorModule::NFP, 65);
constexpr Result WrongApplicationAreaSize(ErrorModule::NFP, 68);
constexpr Result WrongDeviceState(ErrorModule::NFP, 73);
constexpr Result NfcDisabled(ErrorModule::NFP, 80);
constexpr Result WriteAmiiboFailed(ErrorModule::NFP, 88);

View File

@@ -84,6 +84,15 @@ enum class TagType : u32 {
Type5, // ISO15693 RW/RO 540 bytes 106kbit/s
};
enum class PackedTagType : u8 {
None,
Type1, // ISO14443A RW 96-2k bytes 106kbit/s
Type2, // ISO14443A RW/RO 540 bytes 106kbit/s
Type3, // Sony Felica RW/RO 2k bytes 212kbit/s
Type4, // ISO14443A RW/RO 4k-32k bytes 424kbit/s
Type5, // ISO15693 RW/RO 540 bytes 106kbit/s
};
enum class TagProtocol : u32 {
None,
TypeA, // ISO14443A
@@ -104,6 +113,13 @@ struct TagUuid {
};
static_assert(sizeof(TagUuid) == 10, "TagUuid is an invalid size");
struct WriteDate {
u16 year;
u8 month;
u8 day;
};
static_assert(sizeof(WriteDate) == 0x4, "WriteDate is an invalid size");
struct AmiiboDate {
u16 raw_date{};
@@ -121,6 +137,21 @@ struct AmiiboDate {
return static_cast<u8>(GetValue() & 0x001F);
}
WriteDate GetWriteDate() const {
if (!IsValidDate()) {
return {
.year = 2000,
.month = 1,
.day = 1,
};
}
return {
.year = GetYear(),
.month = GetMonth(),
.day = GetDay(),
};
}
void SetYear(u16 year) {
const u16 year_converted = static_cast<u16>((year - 2000) << 9);
raw_date = Common::swap16((GetValue() & ~0xFE00) | year_converted);
@@ -133,6 +164,13 @@ struct AmiiboDate {
const u16 day_converted = static_cast<u16>(day);
raw_date = Common::swap16((GetValue() & ~0x001F) | day_converted);
}
bool IsValidDate() const {
const bool is_day_valid = GetDay() > 0 && GetDay() < 32;
const bool is_month_valid = GetMonth() > 0 && GetMonth() < 13;
const bool is_year_valid = GetYear() >= 2000;
return is_year_valid && is_month_valid && is_day_valid;
}
};
static_assert(sizeof(AmiiboDate) == 2, "AmiiboDate is an invalid size");
@@ -163,7 +201,7 @@ struct AmiiboModelInfo {
AmiiboType amiibo_type;
u16_be model_number;
AmiiboSeries series;
u8 constant_value; // Must be 02
PackedTagType tag_type;
INSERT_PADDING_BYTES(0x4); // Unknown
};
static_assert(sizeof(AmiiboModelInfo) == 0xC, "AmiiboModelInfo is an invalid size");
@@ -250,13 +288,6 @@ struct TagInfo {
};
static_assert(sizeof(TagInfo) == 0x58, "TagInfo is an invalid size");
struct WriteDate {
u16 year;
u8 month;
u8 day;
};
static_assert(sizeof(WriteDate) == 0x4, "WriteDate is an invalid size");
struct CommonInfo {
WriteDate last_write_date;
u16 write_counter;

View File

@@ -93,6 +93,18 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) {
return;
}
if (!ctx.CanWriteBuffer()) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(InvalidArgument);
return;
}
if (ctx.GetWriteBufferSize() == 0) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(InvalidArgument);
return;
}
std::vector<u64> nfp_devices;
const std::size_t max_allowed_devices = ctx.GetWriteBufferSize() / sizeof(u64);
@@ -255,6 +267,12 @@ void IUser::GetApplicationArea(Kernel::HLERequestContext& ctx) {
return;
}
if (!ctx.CanWriteBuffer()) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(InvalidArgument);
return;
}
auto device = GetNfpDevice(device_handle);
if (!device.has_value()) {
@@ -283,6 +301,12 @@ void IUser::SetApplicationArea(Kernel::HLERequestContext& ctx) {
return;
}
if (!ctx.CanReadBuffer()) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(InvalidArgument);
return;
}
auto device = GetNfpDevice(device_handle);
if (!device.has_value()) {
@@ -358,6 +382,12 @@ void IUser::CreateApplicationArea(Kernel::HLERequestContext& ctx) {
return;
}
if (!ctx.CanReadBuffer()) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(InvalidArgument);
return;
}
auto device = GetNfpDevice(device_handle);
if (!device.has_value()) {

View File

@@ -0,0 +1,50 @@
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
#include "video_core/host1x/host1x.h"
namespace Service::Nvidia::NvCore {
struct ContainerImpl {
explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_)
: file{host1x_}, manager{host1x_}, device_file_data{} {}
NvMap file;
SyncpointManager manager;
Container::Host1xDeviceFileData device_file_data;
};
Container::Container(Tegra::Host1x::Host1x& host1x_) {
impl = std::make_unique<ContainerImpl>(host1x_);
}
Container::~Container() = default;
NvMap& Container::GetNvMapFile() {
return impl->file;
}
const NvMap& Container::GetNvMapFile() const {
return impl->file;
}
Container::Host1xDeviceFileData& Container::Host1xDeviceFile() {
return impl->device_file_data;
}
const Container::Host1xDeviceFileData& Container::Host1xDeviceFile() const {
return impl->device_file_data;
}
SyncpointManager& Container::GetSyncpointManager() {
return impl->manager;
}
const SyncpointManager& Container::GetSyncpointManager() const {
return impl->manager;
}
} // namespace Service::Nvidia::NvCore

View File

@@ -0,0 +1,52 @@
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <deque>
#include <memory>
#include <unordered_map>
#include "core/hle/service/nvdrv/nvdata.h"
namespace Tegra::Host1x {
class Host1x;
} // namespace Tegra::Host1x
namespace Service::Nvidia::NvCore {
class NvMap;
class SyncpointManager;
struct ContainerImpl;
class Container {
public:
explicit Container(Tegra::Host1x::Host1x& host1x);
~Container();
NvMap& GetNvMapFile();
const NvMap& GetNvMapFile() const;
SyncpointManager& GetSyncpointManager();
const SyncpointManager& GetSyncpointManager() const;
struct Host1xDeviceFileData {
std::unordered_map<DeviceFD, u32> fd_to_id{};
std::deque<u32> syncpts_accumulated{};
u32 nvdec_next_id{};
u32 vic_next_id{};
};
Host1xDeviceFileData& Host1xDeviceFile();
const Host1xDeviceFileData& Host1xDeviceFile() const;
private:
std::unique_ptr<ContainerImpl> impl;
};
} // namespace Service::Nvidia::NvCore

View File

@@ -0,0 +1,272 @@
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/memory.h"
#include "video_core/host1x/host1x.h"
using Core::Memory::YUZU_PAGESIZE;
namespace Service::Nvidia::NvCore {
NvMap::Handle::Handle(u64 size_, Id id_)
: size(size_), aligned_size(size), orig_size(size), id(id_) {
flags.raw = 0;
}
NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) {
std::scoped_lock lock(mutex);
// Handles cannot be allocated twice
if (allocated) {
return NvResult::AccessDenied;
}
flags = pFlags;
kind = pKind;
align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign;
// This flag is only applicable for handles with an address passed
if (pAddress) {
flags.keep_uncached_after_free.Assign(0);
} else {
LOG_CRITICAL(Service_NVDRV,
"Mapping nvmap handles without a CPU side address is unimplemented!");
}
size = Common::AlignUp(size, YUZU_PAGESIZE);
aligned_size = Common::AlignUp(size, align);
address = pAddress;
allocated = true;
return NvResult::Success;
}
NvResult NvMap::Handle::Duplicate(bool internal_session) {
std::scoped_lock lock(mutex);
// Unallocated handles cannot be duplicated as duplication requires memory accounting (in HOS)
if (!allocated) [[unlikely]] {
return NvResult::BadValue;
}
// If we internally use FromId the duplication tracking of handles won't work accurately due to
// us not implementing per-process handle refs.
if (internal_session) {
internal_dupes++;
} else {
dupes++;
}
return NvResult::Success;
}
NvMap::NvMap(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {}
void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) {
std::scoped_lock lock(handles_lock);
handles.emplace(handle_description->id, std::move(handle_description));
}
void NvMap::UnmapHandle(Handle& handle_description) {
// Remove pending unmap queue entry if needed
if (handle_description.unmap_queue_entry) {
unmap_queue.erase(*handle_description.unmap_queue_entry);
handle_description.unmap_queue_entry.reset();
}
// Free and unmap the handle from the SMMU
host1x.MemoryManager().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address),
handle_description.aligned_size);
host1x.Allocator().Free(handle_description.pin_virt_address,
static_cast<u32>(handle_description.aligned_size));
handle_description.pin_virt_address = 0;
}
bool NvMap::TryRemoveHandle(const Handle& handle_description) {
// No dupes left, we can remove from handle map
if (handle_description.dupes == 0 && handle_description.internal_dupes == 0) {
std::scoped_lock lock(handles_lock);
auto it{handles.find(handle_description.id)};
if (it != handles.end()) {
handles.erase(it);
}
return true;
} else {
return false;
}
}
NvResult NvMap::CreateHandle(u64 size, std::shared_ptr<NvMap::Handle>& result_out) {
if (!size) [[unlikely]] {
return NvResult::BadValue;
}
u32 id{next_handle_id.fetch_add(HandleIdIncrement, std::memory_order_relaxed)};
auto handle_description{std::make_shared<Handle>(size, id)};
AddHandle(handle_description);
result_out = handle_description;
return NvResult::Success;
}
std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) {
std::scoped_lock lock(handles_lock);
try {
return handles.at(handle);
} catch (std::out_of_range&) {
return nullptr;
}
}
VAddr NvMap::GetHandleAddress(Handle::Id handle) {
std::scoped_lock lock(handles_lock);
try {
return handles.at(handle)->address;
} catch (std::out_of_range&) {
return 0;
}
}
u32 NvMap::PinHandle(NvMap::Handle::Id handle) {
auto handle_description{GetHandle(handle)};
if (!handle_description) [[unlikely]] {
return 0;
}
std::scoped_lock lock(handle_description->mutex);
if (!handle_description->pins) {
// If we're in the unmap queue we can just remove ourselves and return since we're already
// mapped
{
// Lock now to prevent our queue entry from being removed for allocation in-between the
// following check and erase
std::scoped_lock queueLock(unmap_queue_lock);
if (handle_description->unmap_queue_entry) {
unmap_queue.erase(*handle_description->unmap_queue_entry);
handle_description->unmap_queue_entry.reset();
handle_description->pins++;
return handle_description->pin_virt_address;
}
}
// If not then allocate some space and map it
u32 address{};
auto& smmu_allocator = host1x.Allocator();
auto& smmu_memory_manager = host1x.MemoryManager();
while (!(address =
smmu_allocator.Allocate(static_cast<u32>(handle_description->aligned_size)))) {
// Free handles until the allocation succeeds
std::scoped_lock queueLock(unmap_queue_lock);
if (auto freeHandleDesc{unmap_queue.front()}) {
// Handles in the unmap queue are guaranteed not to be pinned so don't bother
// checking if they are before unmapping
std::scoped_lock freeLock(freeHandleDesc->mutex);
if (handle_description->pin_virt_address)
UnmapHandle(*freeHandleDesc);
} else {
LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");
}
}
smmu_memory_manager.Map(static_cast<GPUVAddr>(address), handle_description->address,
handle_description->aligned_size);
handle_description->pin_virt_address = address;
}
handle_description->pins++;
return handle_description->pin_virt_address;
}
void NvMap::UnpinHandle(Handle::Id handle) {
auto handle_description{GetHandle(handle)};
if (!handle_description) {
return;
}
std::scoped_lock lock(handle_description->mutex);
if (--handle_description->pins < 0) {
LOG_WARNING(Service_NVDRV, "Pin count imbalance detected!");
} else if (!handle_description->pins) {
std::scoped_lock queueLock(unmap_queue_lock);
// Add to the unmap queue allowing this handle's memory to be freed if needed
unmap_queue.push_back(handle_description);
handle_description->unmap_queue_entry = std::prev(unmap_queue.end());
}
}
void NvMap::DuplicateHandle(Handle::Id handle, bool internal_session) {
auto handle_description{GetHandle(handle)};
if (!handle_description) {
LOG_CRITICAL(Service_NVDRV, "Unregistered handle!");
return;
}
auto result = handle_description->Duplicate(internal_session);
if (result != NvResult::Success) {
LOG_CRITICAL(Service_NVDRV, "Could not duplicate handle!");
}
}
std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool internal_session) {
std::weak_ptr<Handle> hWeak{GetHandle(handle)};
FreeInfo freeInfo;
// We use a weak ptr here so we can tell when the handle has been freed and report that back to
// guest
if (auto handle_description = hWeak.lock()) {
std::scoped_lock lock(handle_description->mutex);
if (internal_session) {
if (--handle_description->internal_dupes < 0)
LOG_WARNING(Service_NVDRV, "Internal duplicate count imbalance detected!");
} else {
if (--handle_description->dupes < 0) {
LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!");
} else if (handle_description->dupes == 0) {
// Force unmap the handle
if (handle_description->pin_virt_address) {
std::scoped_lock queueLock(unmap_queue_lock);
UnmapHandle(*handle_description);
}
handle_description->pins = 0;
}
}
// Try to remove the shared ptr to the handle from the map, if nothing else is using the
// handle then it will now be freed when `handle_description` goes out of scope
if (TryRemoveHandle(*handle_description)) {
LOG_DEBUG(Service_NVDRV, "Removed nvmap handle: {}", handle);
} else {
LOG_DEBUG(Service_NVDRV,
"Tried to free nvmap handle: {} but didn't as it still has duplicates",
handle);
}
freeInfo = {
.address = handle_description->address,
.size = handle_description->size,
.was_uncached = handle_description->flags.map_uncached.Value() != 0,
};
} else {
return std::nullopt;
}
// Handle hasn't been freed from memory, set address to 0 to mark that the handle wasn't freed
if (!hWeak.expired()) {
LOG_DEBUG(Service_NVDRV, "nvmap handle: {} wasn't freed as it is still in use", handle);
freeInfo.address = 0;
}
return freeInfo;
}
} // namespace Service::Nvidia::NvCore

View File

@@ -0,0 +1,175 @@
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <atomic>
#include <list>
#include <memory>
#include <mutex>
#include <optional>
#include <unordered_map>
#include <assert.h>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "core/hle/service/nvdrv/nvdata.h"
namespace Tegra {
namespace Host1x {
class Host1x;
} // namespace Host1x
} // namespace Tegra
namespace Service::Nvidia::NvCore {
/**
* @brief The nvmap core class holds the global state for nvmap and provides methods to manage
* handles
*/
class NvMap {
public:
/**
* @brief A handle to a contiguous block of memory in an application's address space
*/
struct Handle {
std::mutex mutex;
u64 align{}; //!< The alignment to use when pinning the handle onto the SMMU
u64 size; //!< Page-aligned size of the memory the handle refers to
u64 aligned_size; //!< `align`-aligned size of the memory the handle refers to
u64 orig_size; //!< Original unaligned size of the memory this handle refers to
s32 dupes{1}; //!< How many guest references there are to this handle
s32 internal_dupes{0}; //!< How many emulator-internal references there are to this handle
using Id = u32;
Id id; //!< A globally unique identifier for this handle
s32 pins{};
u32 pin_virt_address{};
std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{};
union Flags {
u32 raw;
BitField<0, 1, u32> map_uncached; //!< If the handle should be mapped as uncached
BitField<2, 1, u32> keep_uncached_after_free; //!< Only applicable when the handle was
//!< allocated with a fixed address
BitField<4, 1, u32> _unk0_; //!< Passed to IOVMM for pins
} flags{};
static_assert(sizeof(Flags) == sizeof(u32));
u64 address{}; //!< The memory location in the guest's AS that this handle corresponds to,
//!< this can also be in the nvdrv tmem
bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC
//!< call
u8 kind{}; //!< Used for memory compression
bool allocated{}; //!< If the handle has been allocated with `Alloc`
u64 dma_map_addr{}; //! remove me after implementing pinning.
Handle(u64 size, Id id);
/**
* @brief Sets up the handle with the given memory config, can allocate memory from the tmem
* if a 0 address is passed
*/
[[nodiscard]] NvResult Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress);
/**
* @brief Increases the dupe counter of the handle for the given session
*/
[[nodiscard]] NvResult Duplicate(bool internal_session);
/**
* @brief Obtains a pointer to the handle's memory and marks the handle it as having been
* mapped
*/
u8* GetPointer() {
if (!address) {
return nullptr;
}
is_shared_mem_mapped = true;
return reinterpret_cast<u8*>(address);
}
};
/**
* @brief Encapsulates the result of a FreeHandle operation
*/
struct FreeInfo {
u64 address; //!< Address the handle referred to before deletion
u64 size; //!< Page-aligned handle size
bool was_uncached; //!< If the handle was allocated as uncached
};
explicit NvMap(Tegra::Host1x::Host1x& host1x);
/**
* @brief Creates an unallocated handle of the given size
*/
[[nodiscard]] NvResult CreateHandle(u64 size, std::shared_ptr<NvMap::Handle>& result_out);
std::shared_ptr<Handle> GetHandle(Handle::Id handle);
VAddr GetHandleAddress(Handle::Id handle);
/**
* @brief Maps a handle into the SMMU address space
* @note This operation is refcounted, the number of calls to this must eventually match the
* number of calls to `UnpinHandle`
* @return The SMMU virtual address that the handle has been mapped to
*/
u32 PinHandle(Handle::Id handle);
/**
* @brief When this has been called an equal number of times to `PinHandle` for the supplied
* handle it will be added to a list of handles to be freed when necessary
*/
void UnpinHandle(Handle::Id handle);
/**
* @brief Tries to duplicate a handle
*/
void DuplicateHandle(Handle::Id handle, bool internal_session = false);
/**
* @brief Tries to free a handle and remove a single dupe
* @note If a handle has no dupes left and has no other users a FreeInfo struct will be returned
* describing the prior state of the handle
*/
std::optional<FreeInfo> FreeHandle(Handle::Id handle, bool internal_session);
private:
std::list<std::shared_ptr<Handle>> unmap_queue{};
std::mutex unmap_queue_lock{}; //!< Protects access to `unmap_queue`
std::unordered_map<Handle::Id, std::shared_ptr<Handle>>
handles{}; //!< Main owning map of handles
std::mutex handles_lock; //!< Protects access to `handles`
static constexpr u32 HandleIdIncrement{
4}; //!< Each new handle ID is an increment of 4 from the previous
std::atomic<u32> next_handle_id{HandleIdIncrement};
Tegra::Host1x::Host1x& host1x;
void AddHandle(std::shared_ptr<Handle> handle);
/**
* @brief Unmaps and frees the SMMU memory region a handle is mapped to
* @note Both `unmap_queue_lock` and `handle_description.mutex` MUST be locked when calling this
*/
void UnmapHandle(Handle& handle_description);
/**
* @brief Removes a handle from the map taking its dupes into account
* @note handle_description.mutex MUST be locked when calling this
* @return If the handle was removed from the map
*/
bool TryRemoveHandle(const Handle& handle_description);
};
} // namespace Service::Nvidia::NvCore

View File

@@ -0,0 +1,121 @@
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#include "common/assert.h"
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
#include "video_core/host1x/host1x.h"
namespace Service::Nvidia::NvCore {
SyncpointManager::SyncpointManager(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {
constexpr u32 VBlank0SyncpointId{26};
constexpr u32 VBlank1SyncpointId{27};
// Reserve both vblank syncpoints as client managed as they use Continuous Mode
// Refer to section 14.3.5.3 of the TRM for more information on Continuous Mode
// https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/drm/dc.c#L660
ReserveSyncpoint(VBlank0SyncpointId, true);
ReserveSyncpoint(VBlank1SyncpointId, true);
for (u32 syncpoint_id : channel_syncpoints) {
if (syncpoint_id) {
ReserveSyncpoint(syncpoint_id, false);
}
}
}
SyncpointManager::~SyncpointManager() = default;
u32 SyncpointManager::ReserveSyncpoint(u32 id, bool client_managed) {
if (syncpoints.at(id).reserved) {
ASSERT_MSG(false, "Requested syncpoint is in use");
return 0;
}
syncpoints.at(id).reserved = true;
syncpoints.at(id).interface_managed = client_managed;
return id;
}
u32 SyncpointManager::FindFreeSyncpoint() {
for (u32 i{1}; i < syncpoints.size(); i++) {
if (!syncpoints[i].reserved) {
return i;
}
}
ASSERT_MSG(false, "Failed to find a free syncpoint!");
return 0;
}
u32 SyncpointManager::AllocateSyncpoint(bool client_managed) {
std::lock_guard lock(reservation_lock);
return ReserveSyncpoint(FindFreeSyncpoint(), client_managed);
}
void SyncpointManager::FreeSyncpoint(u32 id) {
std::lock_guard lock(reservation_lock);
ASSERT(syncpoints.at(id).reserved);
syncpoints.at(id).reserved = false;
}
bool SyncpointManager::IsSyncpointAllocated(u32 id) {
return (id <= SyncpointCount) && syncpoints[id].reserved;
}
bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) const {
const SyncpointInfo& syncpoint{syncpoints.at(id)};
if (!syncpoint.reserved) {
ASSERT(false);
return 0;
}
// If the interface manages counters then we don't keep track of the maximum value as it handles
// sanity checking the values then
if (syncpoint.interface_managed) {
return static_cast<s32>(syncpoint.counter_min - threshold) >= 0;
} else {
return (syncpoint.counter_max - threshold) >= (syncpoint.counter_min - threshold);
}
}
u32 SyncpointManager::IncrementSyncpointMaxExt(u32 id, u32 amount) {
if (!syncpoints.at(id).reserved) {
ASSERT(false);
return 0;
}
return syncpoints.at(id).counter_max += amount;
}
u32 SyncpointManager::ReadSyncpointMinValue(u32 id) {
if (!syncpoints.at(id).reserved) {
ASSERT(false);
return 0;
}
return syncpoints.at(id).counter_min;
}
u32 SyncpointManager::UpdateMin(u32 id) {
if (!syncpoints.at(id).reserved) {
ASSERT(false);
return 0;
}
syncpoints.at(id).counter_min = host1x.GetSyncpointManager().GetHostSyncpointValue(id);
return syncpoints.at(id).counter_min;
}
NvFence SyncpointManager::GetSyncpointFence(u32 id) {
if (!syncpoints.at(id).reserved) {
ASSERT(false);
return NvFence{};
}
return {.id = static_cast<s32>(id), .value = syncpoints.at(id).counter_max};
}
} // namespace Service::Nvidia::NvCore

View File

@@ -0,0 +1,134 @@
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <array>
#include <atomic>
#include <mutex>
#include "common/common_types.h"
#include "core/hle/service/nvdrv/nvdata.h"
namespace Tegra::Host1x {
class Host1x;
} // namespace Tegra::Host1x
namespace Service::Nvidia::NvCore {
enum class ChannelType : u32 {
MsEnc = 0,
VIC = 1,
GPU = 2,
NvDec = 3,
Display = 4,
NvJpg = 5,
TSec = 6,
Max = 7
};
/**
* @brief SyncpointManager handles allocating and accessing host1x syncpoints, these are cached
* versions of the HW syncpoints which are intermittently synced
* @note Refer to Chapter 14 of the Tegra X1 TRM for an exhaustive overview of them
* @url https://http.download.nvidia.com/tegra-public-appnotes/host1x.html
* @url
* https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/jetson-tx1/drivers/video/tegra/host/nvhost_syncpt.c
*/
class SyncpointManager final {
public:
explicit SyncpointManager(Tegra::Host1x::Host1x& host1x);
~SyncpointManager();
/**
* @brief Checks if the given syncpoint is both allocated and below the number of HW syncpoints
*/
bool IsSyncpointAllocated(u32 id);
/**
* @brief Finds a free syncpoint and reserves it
* @return The ID of the reserved syncpoint
*/
u32 AllocateSyncpoint(bool client_managed);
/**
* @url
* https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/syncpt.c#L259
*/
bool HasSyncpointExpired(u32 id, u32 threshold) const;
bool IsFenceSignalled(NvFence fence) const {
return HasSyncpointExpired(fence.id, fence.value);
}
/**
* @brief Atomically increments the maximum value of a syncpoint by the given amount
* @return The new max value of the syncpoint
*/
u32 IncrementSyncpointMaxExt(u32 id, u32 amount);
/**
* @return The minimum value of the syncpoint
*/
u32 ReadSyncpointMinValue(u32 id);
/**
* @brief Synchronises the minimum value of the syncpoint to with the GPU
* @return The new minimum value of the syncpoint
*/
u32 UpdateMin(u32 id);
/**
* @brief Frees the usage of a syncpoint.
*/
void FreeSyncpoint(u32 id);
/**
* @return A fence that will be signalled once this syncpoint hits its maximum value
*/
NvFence GetSyncpointFence(u32 id);
static constexpr std::array<u32, static_cast<u32>(ChannelType::Max)> channel_syncpoints{
0x0, // `MsEnc` is unimplemented
0xC, // `VIC`
0x0, // `GPU` syncpoints are allocated per-channel instead
0x36, // `NvDec`
0x0, // `Display` is unimplemented
0x37, // `NvJpg`
0x0, // `TSec` is unimplemented
}; //!< Maps each channel ID to a constant syncpoint
private:
/**
* @note reservation_lock should be locked when calling this
*/
u32 ReserveSyncpoint(u32 id, bool client_managed);
/**
* @return The ID of the first free syncpoint
*/
u32 FindFreeSyncpoint();
struct SyncpointInfo {
std::atomic<u32> counter_min; //!< The least value the syncpoint can be (The value it was
//!< when it was last synchronized with host1x)
std::atomic<u32> counter_max; //!< The maximum value the syncpoint can reach according to
//!< the current usage
bool interface_managed; //!< If the syncpoint is managed by a host1x client interface, a
//!< client interface is a HW block that can handle host1x
//!< transactions on behalf of a host1x client (Which would
//!< otherwise need to be manually synced using PIO which is
//!< synchronous and requires direct cooperation of the CPU)
bool reserved; //!< If the syncpoint is reserved or not, not to be confused with a reserved
//!< value
};
constexpr static std::size_t SyncpointCount{192};
std::array<SyncpointInfo, SyncpointCount> syncpoints{};
std::mutex reservation_lock;
Tegra::Host1x::Host1x& host1x;
};
} // namespace Service::Nvidia::NvCore

View File

@@ -11,6 +11,10 @@ namespace Core {
class System;
}
namespace Kernel {
class KEvent;
}
namespace Service::Nvidia::Devices {
/// Represents an abstract nvidia device node. It is to be subclassed by concrete device nodes to
@@ -64,6 +68,10 @@ public:
*/
virtual void OnClose(DeviceFD fd) = 0;
virtual Kernel::KEvent* QueryEvent(u32 event_id) {
return nullptr;
}
protected:
Core::System& system;
};

View File

@@ -5,15 +5,16 @@
#include "common/logging/log.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
#include "core/perf_stats.h"
#include "video_core/gpu.h"
namespace Service::Nvidia::Devices {
nvdisp_disp0::nvdisp_disp0(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_)
: nvdevice{system_}, nvmap_dev{std::move(nvmap_dev_)} {}
nvdisp_disp0::nvdisp_disp0(Core::System& system_, NvCore::Container& core)
: nvdevice{system_}, container{core}, nvmap{core.GetNvMapFile()} {}
nvdisp_disp0::~nvdisp_disp0() = default;
NvResult nvdisp_disp0::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -39,8 +40,9 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width,
u32 height, u32 stride, android::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect) {
const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
const Common::Rectangle<int>& crop_rect,
std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) {
const VAddr addr = nvmap.GetHandleAddress(buffer_handle);
LOG_TRACE(Service,
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
addr, offset, width, height, stride, format);
@@ -48,10 +50,15 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
const Tegra::FramebufferConfig framebuffer{addr, offset, width, height,
stride, format, transform, crop_rect};
system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences);
system.GetPerfStats().EndSystemFrame();
system.GPU().SwapBuffers(&framebuffer);
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
system.GetPerfStats().BeginSystemFrame();
}
Kernel::KEvent* nvdisp_disp0::QueryEvent(u32 event_id) {
LOG_CRITICAL(Service_NVDRV, "Unknown DISP Event {}", event_id);
return nullptr;
}
} // namespace Service::Nvidia::Devices

View File

@@ -11,13 +11,18 @@
#include "core/hle/service/nvflinger/buffer_transform_flags.h"
#include "core/hle/service/nvflinger/pixel_format.h"
namespace Service::Nvidia::NvCore {
class Container;
class NvMap;
} // namespace Service::Nvidia::NvCore
namespace Service::Nvidia::Devices {
class nvmap;
class nvdisp_disp0 final : public nvdevice {
public:
explicit nvdisp_disp0(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_);
explicit nvdisp_disp0(Core::System& system_, NvCore::Container& core);
~nvdisp_disp0() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -33,10 +38,14 @@ public:
/// Performs a screen flip, drawing the buffer pointed to by the handle.
void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height,
u32 stride, android::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect);
const Common::Rectangle<int>& crop_rect,
std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences);
Kernel::KEvent* QueryEvent(u32 event_id) override;
private:
std::shared_ptr<nvmap> nvmap_dev;
NvCore::Container& container;
NvCore::NvMap& nvmap;
};
} // namespace Service::Nvidia::Devices

View File

@@ -1,21 +1,30 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#include <cstring>
#include <utility>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "video_core/control/channel_state.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace Service::Nvidia::Devices {
nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_)
: nvdevice{system_}, nvmap_dev{std::move(nvmap_dev_)} {}
nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
: nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, vm{},
gmmu{} {}
nvhost_as_gpu::~nvhost_as_gpu() = default;
NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -82,12 +91,52 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>&
IoctlAllocAsEx params{};
std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x{:X}", params.big_page_size);
if (params.big_page_size == 0) {
params.big_page_size = DEFAULT_BIG_PAGE_SIZE;
LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size);
std::scoped_lock lock(mutex);
if (vm.initialised) {
ASSERT_MSG(false, "Cannot initialise an address space twice!");
return NvResult::InvalidState;
}
big_page_size = params.big_page_size;
if (params.big_page_size) {
if (!std::has_single_bit(params.big_page_size)) {
LOG_ERROR(Service_NVDRV, "Non power-of-2 big page size: 0x{:X}!", params.big_page_size);
return NvResult::BadValue;
}
if ((params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES) == 0) {
LOG_ERROR(Service_NVDRV, "Unsupported big page size: 0x{:X}!", params.big_page_size);
return NvResult::BadValue;
}
vm.big_page_size = params.big_page_size;
vm.big_page_size_bits = static_cast<u32>(std::countr_zero(params.big_page_size));
vm.va_range_start = params.big_page_size << VM::VA_START_SHIFT;
}
// If this is unspecified then default values should be used
if (params.va_range_start) {
vm.va_range_start = params.va_range_start;
vm.va_range_split = params.va_range_split;
vm.va_range_end = params.va_range_end;
}
const auto start_pages{static_cast<u32>(vm.va_range_start >> VM::PAGE_SIZE_BITS)};
const auto end_pages{static_cast<u32>(vm.va_range_split >> VM::PAGE_SIZE_BITS)};
vm.small_page_allocator = std::make_shared<VM::Allocator>(start_pages, end_pages);
const auto start_big_pages{static_cast<u32>(vm.va_range_split >> vm.big_page_size_bits)};
const auto end_big_pages{
static_cast<u32>((vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits)};
vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages);
gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, vm.big_page_size_bits,
VM::PAGE_SIZE_BITS);
system.GPU().InitAddressSpace(*gmmu);
vm.initialised = true;
return NvResult::Success;
}
@@ -99,21 +148,76 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages,
params.page_size, params.flags);
const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) {
params.offset = *system.GPU().MemoryManager().AllocateFixed(params.offset, size);
} else {
params.offset = system.GPU().MemoryManager().Allocate(size, params.align);
std::scoped_lock lock(mutex);
if (!vm.initialised) {
return NvResult::BadValue;
}
auto result = NvResult::Success;
if (!params.offset) {
LOG_CRITICAL(Service_NVDRV, "allocation failed for size {}", size);
result = NvResult::InsufficientMemory;
if (params.page_size != VM::YUZU_PAGESIZE && params.page_size != vm.big_page_size) {
return NvResult::BadValue;
}
if (params.page_size != vm.big_page_size &&
((params.flags & MappingFlags::Sparse) != MappingFlags::None)) {
UNIMPLEMENTED_MSG("Sparse small pages are not implemented!");
return NvResult::NotImplemented;
}
const u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS
: vm.big_page_size_bits};
auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator
: *vm.big_page_allocator};
if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
allocator.AllocateFixed(static_cast<u32>(params.offset >> page_size_bits), params.pages);
} else {
params.offset = static_cast<u64>(allocator.Allocate(params.pages)) << page_size_bits;
if (!params.offset) {
ASSERT_MSG(false, "Failed to allocate free space in the GPU AS!");
return NvResult::InsufficientMemory;
}
}
u64 size{static_cast<u64>(params.pages) * params.page_size};
if ((params.flags & MappingFlags::Sparse) != MappingFlags::None) {
gmmu->MapSparse(params.offset, size);
}
allocation_map[params.offset] = {
.size = size,
.mappings{},
.page_size = params.page_size,
.sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None,
.big_pages = params.page_size != VM::YUZU_PAGESIZE,
};
std::memcpy(output.data(), &params, output.size());
return result;
return NvResult::Success;
}
void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
auto mapping{mapping_map.at(offset)};
if (!mapping->fixed) {
auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits),
static_cast<u32>(mapping->size >> page_size_bits));
}
// Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
// Only FreeSpace can unmap them fully
if (mapping->sparse_alloc) {
gmmu->MapSparse(offset, mapping->size, mapping->big_page);
} else {
gmmu->Unmap(offset, mapping->size);
}
mapping_map.erase(offset);
}
NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -123,8 +227,40 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset,
params.pages, params.page_size);
system.GPU().MemoryManager().Unmap(params.offset,
static_cast<std::size_t>(params.pages) * params.page_size);
std::scoped_lock lock(mutex);
if (!vm.initialised) {
return NvResult::BadValue;
}
try {
auto allocation{allocation_map[params.offset]};
if (allocation.page_size != params.page_size ||
allocation.size != (static_cast<u64>(params.pages) * params.page_size)) {
return NvResult::BadValue;
}
for (const auto& mapping : allocation.mappings) {
FreeMappingLocked(mapping->offset);
}
// Unset sparse flag if required
if (allocation.sparse) {
gmmu->Unmap(params.offset, allocation.size);
}
auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator
: *vm.big_page_allocator};
u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS
: vm.big_page_size_bits};
allocator.Free(static_cast<u32>(params.offset >> page_size_bits),
static_cast<u32>(allocation.size >> page_size_bits));
allocation_map.erase(params.offset);
} catch (const std::out_of_range&) {
return NvResult::BadValue;
}
std::memcpy(output.data(), &params, output.size());
return NvResult::Success;
@@ -135,35 +271,52 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries);
auto result = NvResult::Success;
std::vector<IoctlRemapEntry> entries(num_entries);
std::memcpy(entries.data(), input.data(), input.size());
for (const auto& entry : entries) {
LOG_DEBUG(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
entry.offset, entry.nvmap_handle, entry.pages);
std::scoped_lock lock(mutex);
const auto object{nvmap_dev->GetObject(entry.nvmap_handle)};
if (!object) {
LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", entry.nvmap_handle);
result = NvResult::InvalidState;
break;
if (!vm.initialised) {
return NvResult::BadValue;
}
for (const auto& entry : entries) {
GPUVAddr virtual_address{static_cast<u64>(entry.as_offset_big_pages)
<< vm.big_page_size_bits};
u64 size{static_cast<u64>(entry.big_pages) << vm.big_page_size_bits};
auto alloc{allocation_map.upper_bound(virtual_address)};
if (alloc-- == allocation_map.begin() ||
(virtual_address - alloc->first) + size > alloc->second.size) {
LOG_WARNING(Service_NVDRV, "Cannot remap into an unallocated region!");
return NvResult::BadValue;
}
const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
const auto size{static_cast<u64>(entry.pages) << 0x10};
const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10};
const auto addr{system.GPU().MemoryManager().Map(object->addr + map_offset, offset, size)};
if (!alloc->second.sparse) {
LOG_WARNING(Service_NVDRV, "Cannot remap a non-sparse mapping!");
return NvResult::BadValue;
}
if (!addr) {
LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!");
result = NvResult::InvalidState;
break;
const bool use_big_pages = alloc->second.big_pages;
if (!entry.handle) {
gmmu->MapSparse(virtual_address, size, use_big_pages);
} else {
auto handle{nvmap.GetHandle(entry.handle)};
if (!handle) {
return NvResult::BadValue;
}
VAddr cpu_address{static_cast<VAddr>(
handle->address +
(static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
gmmu->Map(virtual_address, cpu_address, size, use_big_pages);
}
}
std::memcpy(output.data(), entries.data(), output.size());
return result;
return NvResult::Success;
}
NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -173,79 +326,98 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
LOG_DEBUG(Service_NVDRV,
"called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}"
", offset={}",
params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size,
params.flags, params.handle, params.buffer_offset, params.mapping_size,
params.offset);
const auto object{nvmap_dev->GetObject(params.nvmap_handle)};
if (!object) {
LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", params.nvmap_handle);
std::memcpy(output.data(), &params, output.size());
return NvResult::InvalidState;
std::scoped_lock lock(mutex);
if (!vm.initialised) {
return NvResult::BadValue;
}
// The real nvservices doesn't make a distinction between handles and ids, and
// object can only have one handle and it will be the same as its id. Assert that this is the
// case to prevent unexpected behavior.
ASSERT(object->id == params.nvmap_handle);
auto& gpu = system.GPU();
// Remaps a subregion of an existing mapping to a different PA
if ((params.flags & MappingFlags::Remap) != MappingFlags::None) {
try {
auto mapping{mapping_map.at(params.offset)};
u64 page_size{params.page_size};
if (!page_size) {
page_size = object->align;
}
if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) {
if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) {
const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)};
const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)};
if (!gpu.MemoryManager().Map(cpu_addr, gpu_addr, params.mapping_size)) {
LOG_CRITICAL(Service_NVDRV,
"remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, "
"mapping_size = {}, offset={}",
params.flags, params.nvmap_handle, params.buffer_offset,
params.mapping_size, params.offset);
std::memcpy(output.data(), &params, output.size());
return NvResult::InvalidState;
if (mapping->size < params.mapping_size) {
LOG_WARNING(Service_NVDRV,
"Cannot remap a partially mapped GPU address space region: 0x{:X}",
params.offset);
return NvResult::BadValue;
}
std::memcpy(output.data(), &params, output.size());
return NvResult::Success;
} else {
LOG_CRITICAL(Service_NVDRV, "address not mapped offset={}", params.offset);
u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
VAddr cpu_address{mapping->ptr + params.buffer_offset};
std::memcpy(output.data(), &params, output.size());
return NvResult::InvalidState;
gmmu->Map(gpu_address, cpu_address, params.mapping_size, mapping->big_page);
return NvResult::Success;
} catch (const std::out_of_range&) {
LOG_WARNING(Service_NVDRV, "Cannot remap an unmapped GPU address space region: 0x{:X}",
params.offset);
return NvResult::BadValue;
}
}
// We can only map objects that have already been assigned a CPU address.
ASSERT(object->status == nvmap::Object::Status::Allocated);
const auto physical_address{object->addr + params.buffer_offset};
u64 size{params.mapping_size};
if (!size) {
size = object->size;
auto handle{nvmap.GetHandle(params.handle)};
if (!handle) {
return NvResult::BadValue;
}
const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None};
if (is_alloc) {
params.offset = gpu.MemoryManager().MapAllocate(physical_address, size, page_size);
} else {
params.offset = gpu.MemoryManager().Map(physical_address, params.offset, size);
}
VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)};
u64 size{params.mapping_size ? params.mapping_size : handle->orig_size};
auto result = NvResult::Success;
if (!params.offset) {
LOG_CRITICAL(Service_NVDRV, "failed to map size={}", size);
result = NvResult::InvalidState;
bool big_page{[&]() {
if (Common::IsAligned(handle->align, vm.big_page_size)) {
return true;
} else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) {
return false;
} else {
ASSERT(false);
return false;
}
}()};
if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
auto alloc{allocation_map.upper_bound(params.offset)};
if (alloc-- == allocation_map.begin() ||
(params.offset - alloc->first) + size > alloc->second.size) {
ASSERT_MSG(false, "Cannot perform a fixed mapping into an unallocated region!");
return NvResult::BadValue;
}
const bool use_big_pages = alloc->second.big_pages && big_page;
gmmu->Map(params.offset, cpu_address, size, use_big_pages);
auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true,
use_big_pages, alloc->second.sparse)};
alloc->second.mappings.push_back(mapping);
mapping_map[params.offset] = mapping;
} else {
AddBufferMap(params.offset, size, physical_address, is_alloc);
auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE};
u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
params.offset = static_cast<u64>(allocator.Allocate(
static_cast<u32>(Common::AlignUp(size, page_size) >> page_size_bits)))
<< page_size_bits;
if (!params.offset) {
ASSERT_MSG(false, "Failed to allocate free space in the GPU AS!");
return NvResult::InsufficientMemory;
}
gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), big_page);
auto mapping{
std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)};
mapping_map[params.offset] = mapping;
}
std::memcpy(output.data(), &params, output.size());
return result;
return NvResult::Success;
}
NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -254,47 +426,82 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);
if (const auto size{RemoveBufferMap(params.offset)}; size) {
system.GPU().MemoryManager().Unmap(params.offset, *size);
} else {
LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset);
std::scoped_lock lock(mutex);
if (!vm.initialised) {
return NvResult::BadValue;
}
try {
auto mapping{mapping_map.at(params.offset)};
if (!mapping->fixed) {
auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits),
static_cast<u32>(mapping->size >> page_size_bits));
}
// Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
// Only FreeSpace can unmap them fully
if (mapping->sparse_alloc) {
gmmu->MapSparse(params.offset, mapping->size, mapping->big_page);
} else {
gmmu->Unmap(params.offset, mapping->size);
}
mapping_map.erase(params.offset);
} catch (const std::out_of_range&) {
LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset);
}
std::memcpy(output.data(), &params, output.size());
return NvResult::Success;
}
NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlBindChannel params{};
std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}", params.fd);
LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
channel = params.fd;
auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
gpu_channel_device->channel_state->memory_manager = gmmu;
return NvResult::Success;
}
void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) {
params.buf_size = 2 * sizeof(VaRegion);
params.regions = std::array<VaRegion, 2>{
VaRegion{
.offset = vm.small_page_allocator->GetVAStart() << VM::PAGE_SIZE_BITS,
.page_size = VM::YUZU_PAGESIZE,
._pad0_{},
.pages = vm.small_page_allocator->GetVALimit() - vm.small_page_allocator->GetVAStart(),
},
VaRegion{
.offset = vm.big_page_allocator->GetVAStart() << vm.big_page_size_bits,
.page_size = vm.big_page_size,
._pad0_{},
.pages = vm.big_page_allocator->GetVALimit() - vm.big_page_allocator->GetVAStart(),
},
};
}
NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlGetVaRegions params{};
std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
params.buf_size);
LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
params.buf_size);
params.buf_size = 0x30;
std::scoped_lock lock(mutex);
params.small = IoctlVaRegion{
.offset = 0x04000000,
.page_size = DEFAULT_SMALL_PAGE_SIZE,
.pages = 0x3fbfff,
};
if (!vm.initialised) {
return NvResult::BadValue;
}
params.big = IoctlVaRegion{
.offset = 0x04000000,
.page_size = big_page_size,
.pages = 0x1bffff,
};
// TODO(ogniK): This probably can stay stubbed but should add support way way later
GetVARegionsImpl(params);
std::memcpy(output.data(), &params, output.size());
return NvResult::Success;
@@ -305,62 +512,27 @@ NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u
IoctlGetVaRegions params{};
std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
params.buf_size);
LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
params.buf_size);
params.buf_size = 0x30;
std::scoped_lock lock(mutex);
params.small = IoctlVaRegion{
.offset = 0x04000000,
.page_size = 0x1000,
.pages = 0x3fbfff,
};
if (!vm.initialised) {
return NvResult::BadValue;
}
params.big = IoctlVaRegion{
.offset = 0x04000000,
.page_size = big_page_size,
.pages = 0x1bffff,
};
// TODO(ogniK): This probably can stay stubbed but should add support way way later
GetVARegionsImpl(params);
std::memcpy(output.data(), &params, output.size());
std::memcpy(inline_output.data(), &params.small, sizeof(IoctlVaRegion));
std::memcpy(inline_output.data() + sizeof(IoctlVaRegion), &params.big, sizeof(IoctlVaRegion));
std::memcpy(inline_output.data(), &params.regions[0], sizeof(VaRegion));
std::memcpy(inline_output.data() + sizeof(VaRegion), &params.regions[1], sizeof(VaRegion));
return NvResult::Success;
}
std::optional<nvhost_as_gpu::BufferMap> nvhost_as_gpu::FindBufferMap(GPUVAddr gpu_addr) const {
const auto end{buffer_mappings.upper_bound(gpu_addr)};
for (auto iter{buffer_mappings.begin()}; iter != end; ++iter) {
if (gpu_addr >= iter->second.StartAddr() && gpu_addr < iter->second.EndAddr()) {
return iter->second;
}
}
return std::nullopt;
}
void nvhost_as_gpu::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr,
bool is_allocated) {
buffer_mappings[gpu_addr] = {gpu_addr, size, cpu_addr, is_allocated};
}
std::optional<std::size_t> nvhost_as_gpu::RemoveBufferMap(GPUVAddr gpu_addr) {
if (const auto iter{buffer_mappings.find(gpu_addr)}; iter != buffer_mappings.end()) {
std::size_t size{};
if (iter->second.IsAllocated()) {
size = iter->second.Size();
}
buffer_mappings.erase(iter);
return size;
}
return std::nullopt;
Kernel::KEvent* nvhost_as_gpu::QueryEvent(u32 event_id) {
LOG_CRITICAL(Service_NVDRV, "Unknown AS GPU Event {}", event_id);
return nullptr;
}
} // namespace Service::Nvidia::Devices

View File

@@ -1,35 +1,50 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <bit>
#include <list>
#include <map>
#include <memory>
#include <mutex>
#include <optional>
#include <vector>
#include "common/address_space.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/swap.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
namespace Tegra {
class MemoryManager;
} // namespace Tegra
namespace Service::Nvidia {
class Module;
}
namespace Service::Nvidia::NvCore {
class Container;
class NvMap;
} // namespace Service::Nvidia::NvCore
namespace Service::Nvidia::Devices {
constexpr u32 DEFAULT_BIG_PAGE_SIZE = 1 << 16;
constexpr u32 DEFAULT_SMALL_PAGE_SIZE = 1 << 12;
class nvmap;
enum class AddressSpaceFlags : u32 {
None = 0x0,
FixedOffset = 0x1,
Remap = 0x100,
enum class MappingFlags : u32 {
None = 0,
Fixed = 1 << 0,
Sparse = 1 << 1,
Remap = 1 << 8,
};
DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags);
DECLARE_ENUM_FLAG_OPERATORS(MappingFlags);
class nvhost_as_gpu final : public nvdevice {
public:
explicit nvhost_as_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_);
explicit nvhost_as_gpu(Core::System& system_, Module& module, NvCore::Container& core);
~nvhost_as_gpu() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -42,46 +57,17 @@ public:
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
private:
class BufferMap final {
public:
constexpr BufferMap() = default;
Kernel::KEvent* QueryEvent(u32 event_id) override;
constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_)
: start_addr{start_addr_}, end_addr{start_addr_ + size_} {}
constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_,
bool is_allocated_)
: start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_},
is_allocated{is_allocated_} {}
constexpr VAddr StartAddr() const {
return start_addr;
}
constexpr VAddr EndAddr() const {
return end_addr;
}
constexpr std::size_t Size() const {
return end_addr - start_addr;
}
constexpr VAddr CpuAddr() const {
return cpu_addr;
}
constexpr bool IsAllocated() const {
return is_allocated;
}
private:
GPUVAddr start_addr{};
GPUVAddr end_addr{};
VAddr cpu_addr{};
bool is_allocated{};
struct VaRegion {
u64 offset;
u32 page_size;
u32 _pad0_;
u64 pages;
};
static_assert(sizeof(VaRegion) == 0x18);
private:
struct IoctlAllocAsEx {
u32_le flags{}; // usually passes 1
s32_le as_fd{}; // ignored; passes 0
@@ -96,7 +82,7 @@ private:
struct IoctlAllocSpace {
u32_le pages{};
u32_le page_size{};
AddressSpaceFlags flags{};
MappingFlags flags{};
INSERT_PADDING_WORDS(1);
union {
u64_le offset;
@@ -113,19 +99,19 @@ private:
static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size");
struct IoctlRemapEntry {
u16_le flags{};
u16_le kind{};
u32_le nvmap_handle{};
u32_le map_offset{};
u32_le offset{};
u32_le pages{};
u16 flags;
u16 kind;
NvCore::NvMap::Handle::Id handle;
u32 handle_offset_big_pages;
u32 as_offset_big_pages;
u32 big_pages;
};
static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size");
struct IoctlMapBufferEx {
AddressSpaceFlags flags{}; // bit0: fixed_offset, bit2: cacheable
u32_le kind{}; // -1 is default
u32_le nvmap_handle{};
MappingFlags flags{}; // bit0: fixed_offset, bit2: cacheable
u32_le kind{}; // -1 is default
NvCore::NvMap::Handle::Id handle;
u32_le page_size{}; // 0 means don't care
s64_le buffer_offset{};
u64_le mapping_size{};
@@ -143,27 +129,15 @@ private:
};
static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size");
struct IoctlVaRegion {
u64_le offset{};
u32_le page_size{};
INSERT_PADDING_WORDS(1);
u64_le pages{};
};
static_assert(sizeof(IoctlVaRegion) == 24, "IoctlVaRegion is incorrect size");
struct IoctlGetVaRegions {
u64_le buf_addr{}; // (contained output user ptr on linux, ignored)
u32_le buf_size{}; // forced to 2*sizeof(struct va_region)
u32_le reserved{};
IoctlVaRegion small{};
IoctlVaRegion big{};
std::array<VaRegion, 2> regions{};
};
static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2,
static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2,
"IoctlGetVaRegions is incorrect size");
s32 channel{};
u32 big_page_size{DEFAULT_BIG_PAGE_SIZE};
NvResult AllocAsEx(const std::vector<u8>& input, std::vector<u8>& output);
NvResult AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output);
NvResult Remap(const std::vector<u8>& input, std::vector<u8>& output);
@@ -172,18 +146,75 @@ private:
NvResult FreeSpace(const std::vector<u8>& input, std::vector<u8>& output);
NvResult BindChannel(const std::vector<u8>& input, std::vector<u8>& output);
void GetVARegionsImpl(IoctlGetVaRegions& params);
NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);
NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output,
std::vector<u8>& inline_output);
std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const;
void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
void FreeMappingLocked(u64 offset);
std::shared_ptr<nvmap> nvmap_dev;
Module& module;
// This is expected to be ordered, therefore we must use a map, not unordered_map
std::map<GPUVAddr, BufferMap> buffer_mappings;
NvCore::Container& container;
NvCore::NvMap& nvmap;
struct Mapping {
VAddr ptr;
u64 offset;
u64 size;
bool fixed;
bool big_page; // Only valid if fixed == false
bool sparse_alloc;
Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_)
: ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_),
sparse_alloc(sparse_alloc_) {}
};
struct Allocation {
u64 size;
std::list<std::shared_ptr<Mapping>> mappings;
u32 page_size;
bool sparse;
bool big_pages;
};
std::map<u64, std::shared_ptr<Mapping>>
mapping_map; //!< This maps the base addresses of mapped buffers to their total sizes and
//!< mapping type, this is needed as what was originally a single buffer may
//!< have been split into multiple GPU side buffers with the remap flag.
std::map<u64, Allocation> allocation_map; //!< Holds allocations created by AllocSpace from
//!< which fixed buffers can be mapped into
std::mutex mutex; //!< Locks all AS operations
struct VM {
static constexpr u32 YUZU_PAGESIZE{0x1000};
static constexpr u32 PAGE_SIZE_BITS{std::countr_zero(YUZU_PAGESIZE)};
static constexpr u32 SUPPORTED_BIG_PAGE_SIZES{0x30000};
static constexpr u32 DEFAULT_BIG_PAGE_SIZE{0x20000};
u32 big_page_size{DEFAULT_BIG_PAGE_SIZE};
u32 big_page_size_bits{std::countr_zero(DEFAULT_BIG_PAGE_SIZE)};
static constexpr u32 VA_START_SHIFT{10};
static constexpr u64 DEFAULT_VA_SPLIT{1ULL << 34};
static constexpr u64 DEFAULT_VA_RANGE{1ULL << 37};
u64 va_range_start{DEFAULT_BIG_PAGE_SIZE << VA_START_SHIFT};
u64 va_range_split{DEFAULT_VA_SPLIT};
u64 va_range_end{DEFAULT_VA_RANGE};
using Allocator = Common::FlatAllocator<u32, 0, 32>;
std::unique_ptr<Allocator> big_page_allocator;
std::shared_ptr<Allocator>
small_page_allocator; //! Shared as this is also used by nvhost::GpuChannel
bool initialised{};
} vm;
std::shared_ptr<Tegra::MemoryManager> gmmu;
// s32 channel{};
// u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
};
} // namespace Service::Nvidia::Devices

View File

@@ -1,24 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#include <bit>
#include <cstdlib>
#include <cstring>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/hle/kernel/k_event.h"
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
#include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
#include "video_core/gpu.h"
#include "video_core/host1x/host1x.h"
namespace Service::Nvidia::Devices {
nvhost_ctrl::nvhost_ctrl(Core::System& system_, EventInterface& events_interface_,
SyncpointManager& syncpoint_manager_)
: nvdevice{system_}, events_interface{events_interface_}, syncpoint_manager{
syncpoint_manager_} {}
nvhost_ctrl::~nvhost_ctrl() = default;
NvCore::Container& core_)
: nvdevice{system_}, events_interface{events_interface_}, core{core_},
syncpoint_manager{core_.GetSyncpointManager()} {}
nvhost_ctrl::~nvhost_ctrl() {
for (auto& event : events) {
if (!event.registered) {
continue;
}
events_interface.FreeEvent(event.kevent);
}
}
NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
std::vector<u8>& output) {
@@ -30,13 +45,15 @@ NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>&
case 0x1c:
return IocCtrlClearEventWait(input, output);
case 0x1d:
return IocCtrlEventWait(input, output, false);
case 0x1e:
return IocCtrlEventWait(input, output, true);
case 0x1e:
return IocCtrlEventWait(input, output, false);
case 0x1f:
return IocCtrlEventRegister(input, output);
case 0x20:
return IocCtrlEventUnregister(input, output);
case 0x21:
return IocCtrlEventUnregisterBatch(input, output);
}
break;
default:
@@ -60,6 +77,7 @@ NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>&
}
void nvhost_ctrl::OnOpen(DeviceFD fd) {}
void nvhost_ctrl::OnClose(DeviceFD fd) {}
NvResult nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -71,116 +89,167 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector
}
NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
bool is_async) {
bool is_allocation) {
IocCtrlEventWaitParams params{};
std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}",
params.syncpt_id, params.threshold, params.timeout, is_async);
LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_allocation={}",
params.fence.id, params.fence.value, params.timeout, is_allocation);
if (params.syncpt_id >= MaxSyncPoints) {
bool must_unmark_fail = !is_allocation;
const u32 event_id = params.value.raw;
SCOPE_EXIT({
std::memcpy(output.data(), &params, sizeof(params));
if (must_unmark_fail) {
events[event_id].fails = 0;
}
});
const u32 fence_id = static_cast<u32>(params.fence.id);
if (fence_id >= MaxSyncPoints) {
return NvResult::BadParameter;
}
u32 event_id = params.value & 0x00FF;
if (params.fence.value == 0) {
if (!syncpoint_manager.IsSyncpointAllocated(params.fence.id)) {
LOG_WARNING(Service_NVDRV,
"Unallocated syncpt_id={}, threshold={}, timeout={}, is_allocation={}",
params.fence.id, params.fence.value, params.timeout, is_allocation);
} else {
params.value.raw = syncpoint_manager.ReadSyncpointMinValue(fence_id);
}
return NvResult::Success;
}
if (event_id >= MaxNvEvents) {
std::memcpy(output.data(), &params, sizeof(params));
if (syncpoint_manager.IsFenceSignalled(params.fence)) {
params.value.raw = syncpoint_manager.ReadSyncpointMinValue(fence_id);
return NvResult::Success;
}
if (const auto new_value = syncpoint_manager.UpdateMin(fence_id);
syncpoint_manager.IsFenceSignalled(params.fence)) {
params.value.raw = new_value;
return NvResult::Success;
}
auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager();
const u32 target_value = params.fence.value;
auto lock = NvEventsLock();
u32 slot = [&]() {
if (is_allocation) {
params.value.raw = 0;
return FindFreeNvEvent(fence_id);
} else {
return params.value.raw;
}
}();
must_unmark_fail = false;
const auto check_failing = [&]() {
if (events[slot].fails > 2) {
{
auto lk = system.StallProcesses();
host1x_syncpoint_manager.WaitHost(fence_id, target_value);
system.UnstallProcesses();
}
params.value.raw = target_value;
return true;
}
return false;
};
if (slot >= MaxNvEvents) {
return NvResult::BadParameter;
}
if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id);
std::memcpy(output.data(), &params, sizeof(params));
events_interface.failed[event_id] = false;
return NvResult::Success;
}
if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id);
syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
params.value = new_value;
std::memcpy(output.data(), &params, sizeof(params));
events_interface.failed[event_id] = false;
return NvResult::Success;
}
auto& event = events_interface.events[event_id];
auto& gpu = system.GPU();
// This is mostly to take into account unimplemented features. As synced
// gpu is always synced.
if (!gpu.IsAsync()) {
event.event->GetWritableEvent().Signal();
return NvResult::Success;
}
const u32 current_syncpoint_value = event.fence.value;
const s32 diff = current_syncpoint_value - params.threshold;
if (diff >= 0) {
event.event->GetWritableEvent().Signal();
params.value = current_syncpoint_value;
std::memcpy(output.data(), &params, sizeof(params));
events_interface.failed[event_id] = false;
return NvResult::Success;
}
const u32 target_value = current_syncpoint_value - diff;
if (!is_async) {
params.value = 0;
}
if (params.timeout == 0) {
std::memcpy(output.data(), &params, sizeof(params));
if (check_failing()) {
events[slot].fails = 0;
return NvResult::Success;
}
return NvResult::Timeout;
}
EventState status = events_interface.status[event_id];
const bool bad_parameter = status == EventState::Busy;
if (bad_parameter) {
std::memcpy(output.data(), &params, sizeof(params));
auto& event = events[slot];
if (!event.registered) {
return NvResult::BadParameter;
}
events_interface.SetEventStatus(event_id, EventState::Waiting);
events_interface.assigned_syncpt[event_id] = params.syncpt_id;
events_interface.assigned_value[event_id] = target_value;
if (is_async) {
params.value = params.syncpt_id << 4;
} else {
params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
if (event.IsBeingUsed()) {
return NvResult::BadParameter;
}
params.value |= event_id;
event.event->GetWritableEvent().Clear();
if (events_interface.failed[event_id]) {
{
auto lk = system.StallProcesses();
gpu.WaitFence(params.syncpt_id, target_value);
system.UnstallProcesses();
}
std::memcpy(output.data(), &params, sizeof(params));
events_interface.failed[event_id] = false;
if (check_failing()) {
event.fails = 0;
return NvResult::Success;
}
gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
std::memcpy(output.data(), &params, sizeof(params));
params.value.raw = 0;
event.status.store(EventState::Waiting, std::memory_order_release);
event.assigned_syncpt = fence_id;
event.assigned_value = target_value;
if (is_allocation) {
params.value.syncpoint_id_for_allocation.Assign(static_cast<u16>(fence_id));
params.value.event_allocated.Assign(1);
} else {
params.value.syncpoint_id.Assign(fence_id);
}
params.value.raw |= slot;
event.wait_handle =
host1x_syncpoint_manager.RegisterHostAction(fence_id, target_value, [this, slot]() {
auto& event_ = events[slot];
if (event_.status.exchange(EventState::Signalling, std::memory_order_acq_rel) ==
EventState::Waiting) {
event_.kevent->GetWritableEvent().Signal();
}
event_.status.store(EventState::Signalled, std::memory_order_release);
});
return NvResult::Timeout;
}
NvResult nvhost_ctrl::FreeEvent(u32 slot) {
if (slot >= MaxNvEvents) {
return NvResult::BadParameter;
}
auto& event = events[slot];
if (!event.registered) {
return NvResult::Success;
}
if (event.IsBeingUsed()) {
return NvResult::Busy;
}
FreeNvEvent(slot);
return NvResult::Success;
}
NvResult nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) {
IocCtrlEventRegisterParams params{};
std::memcpy(&params, input.data(), sizeof(params));
const u32 event_id = params.user_event_id & 0x00FF;
const u32 event_id = params.user_event_id;
LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id);
if (event_id >= MaxNvEvents) {
return NvResult::BadParameter;
}
if (events_interface.registered[event_id]) {
const auto event_state = events_interface.status[event_id];
if (event_state != EventState::Free) {
LOG_WARNING(Service_NVDRV, "Event already registered! Unregistering previous event");
events_interface.UnregisterEvent(event_id);
} else {
return NvResult::BadParameter;
auto lock = NvEventsLock();
if (events[event_id].registered) {
const auto result = FreeEvent(event_id);
if (result != NvResult::Success) {
return result;
}
}
events_interface.RegisterEvent(event_id);
CreateNvEvent(event_id);
return NvResult::Success;
}
@@ -190,34 +259,142 @@ NvResult nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input,
std::memcpy(&params, input.data(), sizeof(params));
const u32 event_id = params.user_event_id & 0x00FF;
LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id);
if (event_id >= MaxNvEvents) {
return NvResult::BadParameter;
auto lock = NvEventsLock();
return FreeEvent(event_id);
}
NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(const std::vector<u8>& input,
std::vector<u8>& output) {
IocCtrlEventUnregisterBatchParams params{};
std::memcpy(&params, input.data(), sizeof(params));
u64 event_mask = params.user_events;
LOG_DEBUG(Service_NVDRV, " called, event_mask: {:X}", event_mask);
auto lock = NvEventsLock();
while (event_mask != 0) {
const u64 event_id = std::countr_zero(event_mask);
event_mask &= ~(1ULL << event_id);
const auto result = FreeEvent(static_cast<u32>(event_id));
if (result != NvResult::Success) {
return result;
}
}
if (!events_interface.registered[event_id]) {
return NvResult::BadParameter;
}
events_interface.UnregisterEvent(event_id);
return NvResult::Success;
}
NvResult nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
IocCtrlEventSignalParams params{};
IocCtrlEventClearParams params{};
std::memcpy(&params, input.data(), sizeof(params));
u32 event_id = params.event_id & 0x00FF;
LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id);
u32 event_id = params.event_id.slot;
LOG_DEBUG(Service_NVDRV, "called, event_id: {:X}", event_id);
if (event_id >= MaxNvEvents) {
return NvResult::BadParameter;
}
if (events_interface.status[event_id] == EventState::Waiting) {
events_interface.LiberateEvent(event_id);
}
events_interface.failed[event_id] = true;
syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id);
auto lock = NvEventsLock();
auto& event = events[event_id];
if (event.status.exchange(EventState::Cancelling, std::memory_order_acq_rel) ==
EventState::Waiting) {
auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager();
host1x_syncpoint_manager.DeregisterHostAction(event.assigned_syncpt, event.wait_handle);
syncpoint_manager.UpdateMin(event.assigned_syncpt);
event.wait_handle = {};
}
event.fails++;
event.status.store(EventState::Cancelled, std::memory_order_release);
event.kevent->GetWritableEvent().Clear();
return NvResult::Success;
}
Kernel::KEvent* nvhost_ctrl::QueryEvent(u32 event_id) {
const auto desired_event = SyncpointEventValue{.raw = event_id};
const bool allocated = desired_event.event_allocated.Value() != 0;
const u32 slot{allocated ? desired_event.partial_slot.Value()
: static_cast<u32>(desired_event.slot)};
if (slot >= MaxNvEvents) {
ASSERT(false);
return nullptr;
}
const u32 syncpoint_id{allocated ? desired_event.syncpoint_id_for_allocation.Value()
: desired_event.syncpoint_id.Value()};
auto lock = NvEventsLock();
auto& event = events[slot];
if (event.registered && event.assigned_syncpt == syncpoint_id) {
ASSERT(event.kevent);
return event.kevent;
}
// Is this possible in hardware?
ASSERT_MSG(false, "Slot:{}, SyncpointID:{}, requested", slot, syncpoint_id);
return nullptr;
}
std::unique_lock<std::mutex> nvhost_ctrl::NvEventsLock() {
return std::unique_lock<std::mutex>(events_mutex);
}
void nvhost_ctrl::CreateNvEvent(u32 event_id) {
auto& event = events[event_id];
ASSERT(!event.kevent);
ASSERT(!event.registered);
ASSERT(!event.IsBeingUsed());
event.kevent = events_interface.CreateEvent(fmt::format("NVCTRL::NvEvent_{}", event_id));
event.status = EventState::Available;
event.registered = true;
const u64 mask = 1ULL << event_id;
event.fails = 0;
events_mask |= mask;
event.assigned_syncpt = 0;
}
void nvhost_ctrl::FreeNvEvent(u32 event_id) {
auto& event = events[event_id];
ASSERT(event.kevent);
ASSERT(event.registered);
ASSERT(!event.IsBeingUsed());
events_interface.FreeEvent(event.kevent);
event.kevent = nullptr;
event.status = EventState::Available;
event.registered = false;
const u64 mask = ~(1ULL << event_id);
events_mask &= mask;
}
u32 nvhost_ctrl::FindFreeNvEvent(u32 syncpoint_id) {
u32 slot{MaxNvEvents};
u32 free_slot{MaxNvEvents};
for (u32 i = 0; i < MaxNvEvents; i++) {
auto& event = events[i];
if (event.registered) {
if (!event.IsBeingUsed()) {
slot = i;
if (event.assigned_syncpt == syncpoint_id) {
return slot;
}
}
} else if (free_slot == MaxNvEvents) {
free_slot = i;
}
}
if (free_slot < MaxNvEvents) {
CreateNvEvent(free_slot);
return free_slot;
}
if (slot < MaxNvEvents) {
return slot;
}
LOG_CRITICAL(Service_NVDRV, "Failed to allocate an event");
return 0;
}
} // namespace Service::Nvidia::Devices

View File

@@ -1,20 +1,28 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <array>
#include <vector>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "video_core/host1x/syncpoint_manager.h"
namespace Service::Nvidia::NvCore {
class Container;
class SyncpointManager;
} // namespace Service::Nvidia::NvCore
namespace Service::Nvidia::Devices {
class nvhost_ctrl final : public nvdevice {
public:
explicit nvhost_ctrl(Core::System& system_, EventInterface& events_interface_,
SyncpointManager& syncpoint_manager_);
NvCore::Container& core);
~nvhost_ctrl() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -27,7 +35,70 @@ public:
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
Kernel::KEvent* QueryEvent(u32 event_id) override;
union SyncpointEventValue {
u32 raw;
union {
BitField<0, 4, u32> partial_slot;
BitField<4, 28, u32> syncpoint_id;
};
struct {
u16 slot;
union {
BitField<0, 12, u16> syncpoint_id_for_allocation;
BitField<12, 1, u16> event_allocated;
};
};
};
static_assert(sizeof(SyncpointEventValue) == sizeof(u32));
private:
struct InternalEvent {
// Mask representing registered events
// Each kernel event associated to an NV event
Kernel::KEvent* kevent{};
// The status of the current NVEvent
std::atomic<EventState> status{};
// Tells the NVEvent that it has failed.
u32 fails{};
// When an NVEvent is waiting on GPU interrupt, this is the sync_point
// associated with it.
u32 assigned_syncpt{};
// This is the value of the GPU interrupt for which the NVEvent is waiting
// for.
u32 assigned_value{};
// Tells if an NVEvent is registered or not
bool registered{};
// Used for waiting on a syncpoint & canceling it.
Tegra::Host1x::SyncpointManager::ActionHandle wait_handle{};
bool IsBeingUsed() const {
const auto current_status = status.load(std::memory_order_acquire);
return current_status == EventState::Waiting ||
current_status == EventState::Cancelling ||
current_status == EventState::Signalling;
}
};
std::unique_lock<std::mutex> NvEventsLock();
void CreateNvEvent(u32 event_id);
void FreeNvEvent(u32 event_id);
u32 FindFreeNvEvent(u32 syncpoint_id);
std::array<InternalEvent, MaxNvEvents> events{};
std::mutex events_mutex;
u64 events_mask{};
struct IocSyncptReadParams {
u32_le id{};
u32_le value{};
@@ -83,27 +154,18 @@ private:
};
static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size");
struct IocCtrlEventSignalParams {
u32_le event_id{};
struct IocCtrlEventClearParams {
SyncpointEventValue event_id{};
};
static_assert(sizeof(IocCtrlEventSignalParams) == 4,
"IocCtrlEventSignalParams is incorrect size");
static_assert(sizeof(IocCtrlEventClearParams) == 4,
"IocCtrlEventClearParams is incorrect size");
struct IocCtrlEventWaitParams {
u32_le syncpt_id{};
u32_le threshold{};
s32_le timeout{};
u32_le value{};
};
static_assert(sizeof(IocCtrlEventWaitParams) == 16, "IocCtrlEventWaitParams is incorrect size");
struct IocCtrlEventWaitAsyncParams {
u32_le syncpt_id{};
u32_le threshold{};
NvFence fence{};
u32_le timeout{};
u32_le value{};
SyncpointEventValue value{};
};
static_assert(sizeof(IocCtrlEventWaitAsyncParams) == 16,
static_assert(sizeof(IocCtrlEventWaitParams) == 16,
"IocCtrlEventWaitAsyncParams is incorrect size");
struct IocCtrlEventRegisterParams {
@@ -118,19 +180,25 @@ private:
static_assert(sizeof(IocCtrlEventUnregisterParams) == 4,
"IocCtrlEventUnregisterParams is incorrect size");
struct IocCtrlEventKill {
struct IocCtrlEventUnregisterBatchParams {
u64_le user_events{};
};
static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size");
static_assert(sizeof(IocCtrlEventUnregisterBatchParams) == 8,
"IocCtrlEventKill is incorrect size");
NvResult NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async);
NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
bool is_allocation);
NvResult IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output);
NvResult IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
NvResult IocCtrlEventUnregisterBatch(const std::vector<u8>& input, std::vector<u8>& output);
NvResult IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);
NvResult FreeEvent(u32 slot);
EventInterface& events_interface;
SyncpointManager& syncpoint_manager;
NvCore::Container& core;
NvCore::SyncpointManager& syncpoint_manager;
};
} // namespace Service::Nvidia::Devices

View File

@@ -7,11 +7,19 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
#include "core/hle/service/nvdrv/nvdrv.h"
namespace Service::Nvidia::Devices {
nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system_) : nvdevice{system_} {}
nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default;
nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system_, EventInterface& events_interface_)
: nvdevice{system_}, events_interface{events_interface_} {
error_notifier_event = events_interface.CreateEvent("CtrlGpuErrorNotifier");
unknown_event = events_interface.CreateEvent("CtrlGpuUknownEvent");
}
nvhost_ctrl_gpu::~nvhost_ctrl_gpu() {
events_interface.FreeEvent(error_notifier_event);
events_interface.FreeEvent(unknown_event);
}
NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
std::vector<u8>& output) {
@@ -286,4 +294,17 @@ NvResult nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u
return NvResult::Success;
}
Kernel::KEvent* nvhost_ctrl_gpu::QueryEvent(u32 event_id) {
switch (event_id) {
case 1:
return error_notifier_event;
case 2:
return unknown_event;
default: {
LOG_CRITICAL(Service_NVDRV, "Unknown Ctrl GPU Event {}", event_id);
}
}
return nullptr;
}
} // namespace Service::Nvidia::Devices

View File

@@ -10,11 +10,15 @@
#include "common/swap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
namespace Service::Nvidia {
class EventInterface;
}
namespace Service::Nvidia::Devices {
class nvhost_ctrl_gpu final : public nvdevice {
public:
explicit nvhost_ctrl_gpu(Core::System& system_);
explicit nvhost_ctrl_gpu(Core::System& system_, EventInterface& events_interface_);
~nvhost_ctrl_gpu() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -27,6 +31,8 @@ public:
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
Kernel::KEvent* QueryEvent(u32 event_id) override;
private:
struct IoctlGpuCharacteristics {
u32_le arch; // 0x120 (NVGPU_GPU_ARCH_GM200)
@@ -160,6 +166,12 @@ private:
NvResult ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output);
NvResult FlushL2(const std::vector<u8>& input, std::vector<u8>& output);
NvResult GetGpuTime(const std::vector<u8>& input, std::vector<u8>& output);
EventInterface& events_interface;
// Events
Kernel::KEvent* error_notifier_event;
Kernel::KEvent* unknown_event;
};
} // namespace Service::Nvidia::Devices

View File

@@ -5,29 +5,46 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "core/memory.h"
#include "video_core/control/channel_state.h"
#include "video_core/engines/puller.h"
#include "video_core/gpu.h"
#include "video_core/host1x/host1x.h"
namespace Service::Nvidia::Devices {
namespace {
Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) {
Tegra::GPU::FenceAction result{};
Tegra::CommandHeader BuildFenceAction(Tegra::Engines::Puller::FenceOperation op, u32 syncpoint_id) {
Tegra::Engines::Puller::FenceAction result{};
result.op.Assign(op);
result.syncpoint_id.Assign(syncpoint_id);
return {result.raw};
}
} // namespace
nvhost_gpu::nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
SyncpointManager& syncpoint_manager_)
: nvdevice{system_}, nvmap_dev{std::move(nvmap_dev_)}, syncpoint_manager{syncpoint_manager_} {
channel_fence.id = syncpoint_manager_.AllocateSyncpoint();
channel_fence.value = system_.GPU().GetSyncpointValue(channel_fence.id);
nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
NvCore::Container& core_)
: nvdevice{system_}, events_interface{events_interface_}, core{core_},
syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
channel_state{system.GPU().AllocateChannel()} {
channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false);
sm_exception_breakpoint_int_report_event =
events_interface.CreateEvent("GpuChannelSMExceptionBreakpointInt");
sm_exception_breakpoint_pause_report_event =
events_interface.CreateEvent("GpuChannelSMExceptionBreakpointPause");
error_notifier_event = events_interface.CreateEvent("GpuChannelErrorNotifier");
}
nvhost_gpu::~nvhost_gpu() = default;
nvhost_gpu::~nvhost_gpu() {
events_interface.FreeEvent(sm_exception_breakpoint_int_report_event);
events_interface.FreeEvent(sm_exception_breakpoint_pause_report_event);
events_interface.FreeEvent(error_notifier_event);
syncpoint_manager.FreeSyncpoint(channel_syncpoint);
}
NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
std::vector<u8>& output) {
@@ -167,9 +184,14 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8
params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
params.unk3);
channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
if (channel_state->initialized) {
LOG_CRITICAL(Service_NVDRV, "Already allocated!");
return NvResult::AlreadyAllocated;
}
params.fence_out = channel_fence;
system.GPU().InitChannel(*channel_state);
params.fence_out = syncpoint_manager.GetSyncpointFence(channel_syncpoint);
std::memcpy(output.data(), &params, output.size());
return NvResult::Success;
@@ -188,39 +210,37 @@ NvResult nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::ve
static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
return {
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
Tegra::SubmissionMode::Increasing),
{fence.value},
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
Tegra::SubmissionMode::Increasing),
BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id),
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
};
}
static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence,
u32 add_increment) {
static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence) {
std::vector<Tegra::CommandHeader> result{
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
Tegra::SubmissionMode::Increasing),
{}};
for (u32 count = 0; count < add_increment; ++count) {
result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
for (u32 count = 0; count < 2; ++count) {
result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
Tegra::SubmissionMode::Increasing));
result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id));
result.emplace_back(
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
}
return result;
}
static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence,
u32 add_increment) {
static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence) {
std::vector<Tegra::CommandHeader> result{
Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1,
Tegra::SubmissionMode::Increasing),
{}};
const std::vector<Tegra::CommandHeader> increment{
BuildIncrementCommandList(fence, add_increment)};
const std::vector<Tegra::CommandHeader> increment{BuildIncrementCommandList(fence)};
result.insert(result.end(), increment.begin(), increment.end());
@@ -234,33 +254,41 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
auto& gpu = system.GPU();
params.fence_out.id = channel_fence.id;
std::scoped_lock lock(channel_mutex);
if (params.flags.add_wait.Value() &&
!syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
}
const auto bind_id = channel_state->bind_id;
if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0;
params.fence_out.value = syncpoint_manager.IncreaseSyncpoint(
params.fence_out.id, params.AddIncrementValue() + increment_value);
} else {
params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
}
auto& flags = params.flags;
gpu.PushGPUEntries(std::move(entries));
if (flags.fence_wait.Value()) {
if (flags.increment_value.Value()) {
return NvResult::BadParameter;
}
if (params.flags.add_increment.Value()) {
if (params.flags.suppress_wfi) {
gpu.PushGPUEntries(Tegra::CommandList{
BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
} else {
gpu.PushGPUEntries(Tegra::CommandList{
BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
if (!syncpoint_manager.IsFenceSignalled(params.fence)) {
gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence)});
}
}
params.fence.id = channel_syncpoint;
u32 increment{(flags.fence_increment.Value() != 0 ? 2 : 0) +
(flags.increment_value.Value() != 0 ? params.fence.value : 0)};
params.fence.value = syncpoint_manager.IncrementSyncpointMaxExt(channel_syncpoint, increment);
gpu.PushGPUEntries(bind_id, std::move(entries));
if (flags.fence_increment.Value()) {
if (flags.suppress_wfi.Value()) {
gpu.PushGPUEntries(bind_id,
Tegra::CommandList{BuildIncrementCommandList(params.fence)});
} else {
gpu.PushGPUEntries(bind_id,
Tegra::CommandList{BuildIncrementWithWfiCommandList(params.fence)});
}
}
flags.raw = 0;
std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
return NvResult::Success;
}
@@ -328,4 +356,19 @@ NvResult nvhost_gpu::ChannelSetTimeslice(const std::vector<u8>& input, std::vect
return NvResult::Success;
}
Kernel::KEvent* nvhost_gpu::QueryEvent(u32 event_id) {
switch (event_id) {
case 1:
return sm_exception_breakpoint_int_report_event;
case 2:
return sm_exception_breakpoint_pause_report_event;
case 3:
return error_notifier_event;
default: {
LOG_CRITICAL(Service_NVDRV, "Unknown Ctrl GPU Event {}", event_id);
}
}
return nullptr;
}
} // namespace Service::Nvidia::Devices

View File

@@ -13,17 +13,31 @@
#include "core/hle/service/nvdrv/nvdata.h"
#include "video_core/dma_pusher.h"
namespace Service::Nvidia {
class SyncpointManager;
namespace Tegra {
namespace Control {
struct ChannelState;
}
} // namespace Tegra
namespace Service::Nvidia {
namespace NvCore {
class Container;
class NvMap;
class SyncpointManager;
} // namespace NvCore
class EventInterface;
} // namespace Service::Nvidia
namespace Service::Nvidia::Devices {
class nvhost_as_gpu;
class nvmap;
class nvhost_gpu final : public nvdevice {
public:
explicit nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
SyncpointManager& syncpoint_manager_);
explicit nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
NvCore::Container& core);
~nvhost_gpu() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -36,7 +50,10 @@ public:
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
Kernel::KEvent* QueryEvent(u32 event_id) override;
private:
friend class nvhost_as_gpu;
enum class CtxObjects : u32_le {
Ctx2D = 0x902D,
Ctx3D = 0xB197,
@@ -146,17 +163,13 @@ private:
u32_le num_entries{}; // number of fence objects being submitted
union {
u32_le raw;
BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list
BitField<1, 1, u32_le> add_increment; // append an increment to the list
BitField<2, 1, u32_le> new_hw_format; // mostly ignored
BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt
BitField<8, 1, u32_le> increment; // increment the returned fence
BitField<0, 1, u32_le> fence_wait; // append a wait sync_point to the list
BitField<1, 1, u32_le> fence_increment; // append an increment to the list
BitField<2, 1, u32_le> new_hw_format; // mostly ignored
BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt
BitField<8, 1, u32_le> increment_value; // increment the returned fence
} flags;
NvFence fence_out{}; // returned new fence object for others to wait on
u32 AddIncrementValue() const {
return flags.add_increment.Value() << 1;
}
NvFence fence{}; // returned new fence object for others to wait on
};
static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(NvFence),
"IoctlSubmitGpfifo is incorrect size");
@@ -191,9 +204,18 @@ private:
NvResult ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);
NvResult ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
std::shared_ptr<nvmap> nvmap_dev;
SyncpointManager& syncpoint_manager;
NvFence channel_fence;
EventInterface& events_interface;
NvCore::Container& core;
NvCore::SyncpointManager& syncpoint_manager;
NvCore::NvMap& nvmap;
std::shared_ptr<Tegra::Control::ChannelState> channel_state;
u32 channel_syncpoint;
std::mutex channel_mutex;
// Events
Kernel::KEvent* sm_exception_breakpoint_int_report_event;
Kernel::KEvent* sm_exception_breakpoint_pause_report_event;
Kernel::KEvent* error_notifier_event;
};
} // namespace Service::Nvidia::Devices

View File

@@ -5,14 +5,14 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
#include "video_core/renderer_base.h"
namespace Service::Nvidia::Devices {
nvhost_nvdec::nvhost_nvdec(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
SyncpointManager& syncpoint_manager_)
: nvhost_nvdec_common{system_, std::move(nvmap_dev_), syncpoint_manager_} {}
nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_)
: nvhost_nvdec_common{system_, core_, NvCore::ChannelType::NvDec} {}
nvhost_nvdec::~nvhost_nvdec() = default;
NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -21,8 +21,9 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>&
case 0x0:
switch (command.cmd) {
case 0x1: {
if (!fd_to_id.contains(fd)) {
fd_to_id[fd] = next_id++;
auto& host1x_file = core.Host1xDeviceFile();
if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++;
}
return Submit(fd, input, output);
}
@@ -73,8 +74,9 @@ void nvhost_nvdec::OnOpen(DeviceFD fd) {
void nvhost_nvdec::OnClose(DeviceFD fd) {
LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
const auto iter = fd_to_id.find(fd);
if (iter != fd_to_id.end()) {
auto& host1x_file = core.Host1xDeviceFile();
const auto iter = host1x_file.fd_to_id.find(fd);
if (iter != host1x_file.fd_to_id.end()) {
system.GPU().ClearCdmaInstance(iter->second);
}
system.AudioCore().SetNVDECActive(false);

View File

@@ -10,8 +10,7 @@ namespace Service::Nvidia::Devices {
class nvhost_nvdec final : public nvhost_nvdec_common {
public:
explicit nvhost_nvdec(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
SyncpointManager& syncpoint_manager_);
explicit nvhost_nvdec(Core::System& system_, NvCore::Container& core);
~nvhost_nvdec() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -23,9 +22,6 @@ public:
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
private:
u32 next_id{};
};
} // namespace Service::Nvidia::Devices

View File

@@ -8,10 +8,12 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/memory.h"
#include "video_core/host1x/host1x.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
@@ -44,10 +46,22 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
}
} // Anonymous namespace
nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
SyncpointManager& syncpoint_manager_)
: nvdevice{system_}, nvmap_dev{std::move(nvmap_dev_)}, syncpoint_manager{syncpoint_manager_} {}
nvhost_nvdec_common::~nvhost_nvdec_common() = default;
nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_,
NvCore::ChannelType channel_type_)
: nvdevice{system_}, core{core_}, syncpoint_manager{core.GetSyncpointManager()},
nvmap{core.GetNvMapFile()}, channel_type{channel_type_} {
auto& syncpts_accumulated = core.Host1xDeviceFile().syncpts_accumulated;
if (syncpts_accumulated.empty()) {
channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false);
} else {
channel_syncpoint = syncpts_accumulated.front();
syncpts_accumulated.pop_front();
}
}
nvhost_nvdec_common::~nvhost_nvdec_common() {
core.Host1xDeviceFile().syncpts_accumulated.push_back(channel_syncpoint);
}
NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
IoctlSetNvmapFD params{};
@@ -84,16 +98,16 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector<u8>& input,
for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
const SyncptIncr& syncpt_incr = syncpt_increments[i];
fence_thresholds[i] =
syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments);
syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments);
}
}
for (const auto& cmd_buffer : command_buffers) {
const auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
const auto object = nvmap.GetHandle(cmd_buffer.memory_id);
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
system.Memory().ReadBlock(object->addr + cmd_buffer.offset, cmdlist.data(),
system.Memory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(fd_to_id[fd], cmdlist);
gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
}
std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
// Some games expect command_buffers to be written back
@@ -112,10 +126,8 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve
std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
if (device_syncpoints[params.param] == 0 && system.GPU().UseNvdec()) {
device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint();
}
params.value = device_syncpoints[params.param];
// const u32 id{NvCore::SyncpointManager::channel_syncpoints[static_cast<u32>(channel_type)]};
params.value = channel_syncpoint;
std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
return NvResult::Success;
@@ -123,6 +135,7 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve
NvResult nvhost_nvdec_common::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlGetWaitbase params{};
LOG_CRITICAL(Service_NVDRV, "called WAITBASE");
std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
params.value = 0; // Seems to be hard coded at 0
std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
@@ -136,28 +149,8 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
auto& gpu = system.GPU();
for (auto& cmd_buffer : cmd_buffer_handles) {
auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
if (!object) {
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
std::memcpy(output.data(), &params, output.size());
return NvResult::InvalidState;
}
if (object->dma_map_addr == 0) {
// NVDEC and VIC memory is in the 32-bit address space
// MapAllocate32 will attempt to map a lower 32-bit value in the shared gpu memory space
const GPUVAddr low_addr = gpu.MemoryManager().MapAllocate32(object->addr, object->size);
object->dma_map_addr = static_cast<u32>(low_addr);
// Ensure that the dma_map_addr is indeed in the lower 32-bit address space.
ASSERT(object->dma_map_addr == low_addr);
}
if (!object->dma_map_addr) {
LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
} else {
cmd_buffer.map_address = object->dma_map_addr;
}
cmd_buffer.map_address = nvmap.PinHandle(cmd_buffer.map_handle);
}
std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
std::memcpy(output.data() + sizeof(IoctlMapBuffer), cmd_buffer_handles.data(),
@@ -167,11 +160,16 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
}
NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
// This is intntionally stubbed.
// Skip unmapping buffers here, as to not break the continuity of the VP9 reference frame
// addresses, and risk invalidating data before the async GPU thread is done with it
IoctlMapBuffer params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
for (auto& cmd_buffer : cmd_buffer_handles) {
nvmap.UnpinHandle(cmd_buffer.map_handle);
}
std::memset(output.data(), 0, output.size());
LOG_DEBUG(Service_NVDRV, "(STUBBED) called");
return NvResult::Success;
}
@@ -182,4 +180,9 @@ NvResult nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input,
return NvResult::Success;
}
Kernel::KEvent* nvhost_nvdec_common::QueryEvent(u32 event_id) {
LOG_CRITICAL(Service_NVDRV, "Unknown HOSTX1 Event {}", event_id);
return nullptr;
}
} // namespace Service::Nvidia::Devices

View File

@@ -3,21 +3,26 @@
#pragma once
#include <deque>
#include <vector>
#include "common/common_types.h"
#include "common/swap.h"
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
namespace Service::Nvidia {
class SyncpointManager;
namespace NvCore {
class Container;
class NvMap;
} // namespace NvCore
namespace Devices {
class nvmap;
class nvhost_nvdec_common : public nvdevice {
public:
explicit nvhost_nvdec_common(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
SyncpointManager& syncpoint_manager_);
explicit nvhost_nvdec_common(Core::System& system_, NvCore::Container& core,
NvCore::ChannelType channel_type);
~nvhost_nvdec_common() override;
protected:
@@ -110,11 +115,15 @@ protected:
NvResult UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
NvResult SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);
std::unordered_map<DeviceFD, u32> fd_to_id{};
Kernel::KEvent* QueryEvent(u32 event_id) override;
u32 channel_syncpoint;
s32_le nvmap_fd{};
u32_le submit_timeout{};
std::shared_ptr<nvmap> nvmap_dev;
SyncpointManager& syncpoint_manager;
NvCore::Container& core;
NvCore::SyncpointManager& syncpoint_manager;
NvCore::NvMap& nvmap;
NvCore::ChannelType channel_type;
std::array<u32, MaxSyncPoints> device_syncpoints{};
};
}; // namespace Devices

View File

@@ -4,13 +4,14 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/devices/nvhost_vic.h"
#include "video_core/renderer_base.h"
namespace Service::Nvidia::Devices {
nvhost_vic::nvhost_vic(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
SyncpointManager& syncpoint_manager_)
: nvhost_nvdec_common{system_, std::move(nvmap_dev_), syncpoint_manager_} {}
nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_)
: nvhost_nvdec_common{system_, core_, NvCore::ChannelType::VIC} {}
nvhost_vic::~nvhost_vic() = default;
@@ -19,11 +20,13 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& i
switch (command.group) {
case 0x0:
switch (command.cmd) {
case 0x1:
if (!fd_to_id.contains(fd)) {
fd_to_id[fd] = next_id++;
case 0x1: {
auto& host1x_file = core.Host1xDeviceFile();
if (!host1x_file.fd_to_id.contains(fd)) {
host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++;
}
return Submit(fd, input, output);
}
case 0x2:
return GetSyncpoint(input, output);
case 0x3:
@@ -67,8 +70,9 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& i
void nvhost_vic::OnOpen(DeviceFD fd) {}
void nvhost_vic::OnClose(DeviceFD fd) {
const auto iter = fd_to_id.find(fd);
if (iter != fd_to_id.end()) {
auto& host1x_file = core.Host1xDeviceFile();
const auto iter = host1x_file.fd_to_id.find(fd);
if (iter != host1x_file.fd_to_id.end()) {
system.GPU().ClearCdmaInstance(iter->second);
}
}

View File

@@ -9,8 +9,7 @@ namespace Service::Nvidia::Devices {
class nvhost_vic final : public nvhost_nvdec_common {
public:
explicit nvhost_vic(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
SyncpointManager& syncpoint_manager_);
explicit nvhost_vic(Core::System& system_, NvCore::Container& core);
~nvhost_vic();
NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -22,8 +21,5 @@ public:
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
private:
u32 next_id{};
};
} // namespace Service::Nvidia::Devices

View File

@@ -2,19 +2,26 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <bit>
#include <cstring>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
#include "core/memory.h"
using Core::Memory::YUZU_PAGESIZE;
namespace Service::Nvidia::Devices {
nvmap::nvmap(Core::System& system_) : nvdevice{system_} {
// Handle 0 appears to be used when remapping, so we create a placeholder empty nvmap object to
// represent this.
CreateObject(0);
}
nvmap::nvmap(Core::System& system_, NvCore::Container& container_)
: nvdevice{system_}, container{container_}, file{container.GetNvMapFile()} {}
nvmap::~nvmap() = default;
@@ -62,39 +69,21 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
void nvmap::OnOpen(DeviceFD fd) {}
void nvmap::OnClose(DeviceFD fd) {}
VAddr nvmap::GetObjectAddress(u32 handle) const {
auto object = GetObject(handle);
ASSERT(object);
ASSERT(object->status == Object::Status::Allocated);
return object->addr;
}
u32 nvmap::CreateObject(u32 size) {
// Create a new nvmap object and obtain a handle to it.
auto object = std::make_shared<Object>();
object->id = next_id++;
object->size = size;
object->status = Object::Status::Created;
object->refcount = 1;
const u32 handle = next_handle++;
handles.insert_or_assign(handle, std::move(object));
return handle;
}
NvResult nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
IocCreateParams params;
std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "size=0x{:08X}", params.size);
LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size);
if (!params.size) {
LOG_ERROR(Service_NVDRV, "Size is 0");
return NvResult::BadValue;
std::shared_ptr<NvCore::NvMap::Handle> handle_description{};
auto result =
file.CreateHandle(Common::AlignUp(params.size, YUZU_PAGESIZE), handle_description);
if (result != NvResult::Success) {
LOG_CRITICAL(Service_NVDRV, "Failed to create Object");
return result;
}
params.handle = CreateObject(params.size);
handle_description->orig_size = params.size; // Orig size is the unaligned size
params.handle = handle_description->id;
LOG_DEBUG(Service_NVDRV, "handle: {}, size: 0x{:X}", handle_description->id, params.size);
std::memcpy(output.data(), &params, sizeof(params));
return NvResult::Success;
@@ -103,63 +92,68 @@ NvResult nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output)
NvResult nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
IocAllocParams params;
std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr);
LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address);
if (!params.handle) {
LOG_ERROR(Service_NVDRV, "Handle is 0");
LOG_CRITICAL(Service_NVDRV, "Handle is 0");
return NvResult::BadValue;
}
if ((params.align - 1) & params.align) {
LOG_ERROR(Service_NVDRV, "Incorrect alignment used, alignment={:08X}", params.align);
LOG_CRITICAL(Service_NVDRV, "Incorrect alignment used, alignment={:08X}", params.align);
return NvResult::BadValue;
}
const u32 min_alignment = 0x1000;
if (params.align < min_alignment) {
params.align = min_alignment;
// Force page size alignment at a minimum
if (params.align < YUZU_PAGESIZE) {
params.align = YUZU_PAGESIZE;
}
auto object = GetObject(params.handle);
if (!object) {
LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
auto handle_description{file.GetHandle(params.handle)};
if (!handle_description) {
LOG_CRITICAL(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
return NvResult::BadValue;
}
if (object->status == Object::Status::Allocated) {
LOG_ERROR(Service_NVDRV, "Object is already allocated, handle={:08X}", params.handle);
if (handle_description->allocated) {
LOG_CRITICAL(Service_NVDRV, "Object is already allocated, handle={:08X}", params.handle);
return NvResult::InsufficientMemory;
}
object->flags = params.flags;
object->align = params.align;
object->kind = params.kind;
object->addr = params.addr;
object->status = Object::Status::Allocated;
const auto result =
handle_description->Alloc(params.flags, params.align, params.kind, params.address);
if (result != NvResult::Success) {
LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle);
return result;
}
ASSERT(system.CurrentProcess()
->PageTable()
.LockForDeviceAddressSpace(handle_description->address, handle_description->size)
.IsSuccess());
std::memcpy(output.data(), &params, sizeof(params));
return NvResult::Success;
return result;
}
NvResult nvmap::IocGetId(const std::vector<u8>& input, std::vector<u8>& output) {
IocGetIdParams params;
std::memcpy(&params, input.data(), sizeof(params));
LOG_WARNING(Service_NVDRV, "called");
LOG_DEBUG(Service_NVDRV, "called");
// See the comment in FromId for extra info on this function
if (!params.handle) {
LOG_ERROR(Service_NVDRV, "Handle is zero");
LOG_CRITICAL(Service_NVDRV, "Error!");
return NvResult::BadValue;
}
auto object = GetObject(params.handle);
if (!object) {
LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
return NvResult::BadValue;
auto handle_description{file.GetHandle(params.handle)};
if (!handle_description) {
LOG_CRITICAL(Service_NVDRV, "Error!");
return NvResult::AccessDenied; // This will always return EPERM irrespective of if the
// handle exists or not
}
params.id = object->id;
params.id = handle_description->id;
std::memcpy(output.data(), &params, sizeof(params));
return NvResult::Success;
}
@@ -168,26 +162,29 @@ NvResult nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output)
IocFromIdParams params;
std::memcpy(&params, input.data(), sizeof(params));
LOG_WARNING(Service_NVDRV, "(STUBBED) called");
LOG_DEBUG(Service_NVDRV, "called, id:{}", params.id);
auto itr = std::find_if(handles.begin(), handles.end(),
[&](const auto& entry) { return entry.second->id == params.id; });
if (itr == handles.end()) {
LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
// Handles and IDs are always the same value in nvmap however IDs can be used globally given the
// right permissions.
// Since we don't plan on ever supporting multiprocess we can skip implementing handle refs and
// so this function just does simple validation and passes through the handle id.
if (!params.id) {
LOG_CRITICAL(Service_NVDRV, "Zero Id is invalid!");
return NvResult::BadValue;
}
auto& object = itr->second;
if (object->status != Object::Status::Allocated) {
LOG_ERROR(Service_NVDRV, "Object is not allocated, handle={:08X}", params.handle);
auto handle_description{file.GetHandle(params.id)};
if (!handle_description) {
LOG_CRITICAL(Service_NVDRV, "Unregistered handle!");
return NvResult::BadValue;
}
itr->second->refcount++;
// Return the existing handle instead of creating a new one.
params.handle = itr->first;
auto result = handle_description->Duplicate(false);
if (result != NvResult::Success) {
LOG_CRITICAL(Service_NVDRV, "Could not duplicate handle!");
return result;
}
params.handle = handle_description->id;
std::memcpy(output.data(), &params, sizeof(params));
return NvResult::Success;
}
@@ -198,35 +195,43 @@ NvResult nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output)
IocParamParams params;
std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "(STUBBED) called type={}", params.param);
LOG_DEBUG(Service_NVDRV, "called type={}", params.param);
auto object = GetObject(params.handle);
if (!object) {
LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
if (!params.handle) {
LOG_CRITICAL(Service_NVDRV, "Invalid handle!");
return NvResult::BadValue;
}
if (object->status != Object::Status::Allocated) {
LOG_ERROR(Service_NVDRV, "Object is not allocated, handle={:08X}", params.handle);
auto handle_description{file.GetHandle(params.handle)};
if (!handle_description) {
LOG_CRITICAL(Service_NVDRV, "Not registered handle!");
return NvResult::BadValue;
}
switch (static_cast<ParamTypes>(params.param)) {
case ParamTypes::Size:
params.result = object->size;
switch (params.param) {
case HandleParameterType::Size:
params.result = static_cast<u32_le>(handle_description->orig_size);
break;
case ParamTypes::Alignment:
params.result = object->align;
case HandleParameterType::Alignment:
params.result = static_cast<u32_le>(handle_description->align);
break;
case ParamTypes::Heap:
// TODO(Subv): Seems to be a hardcoded value?
params.result = 0x40000000;
case HandleParameterType::Base:
params.result = static_cast<u32_le>(-22); // posix EINVAL
break;
case ParamTypes::Kind:
params.result = object->kind;
case HandleParameterType::Heap:
if (handle_description->allocated)
params.result = 0x40000000;
else
params.result = 0;
break;
case HandleParameterType::Kind:
params.result = handle_description->kind;
break;
case HandleParameterType::IsSharedMemMapped:
params.result = handle_description->is_shared_mem_mapped;
break;
default:
UNIMPLEMENTED();
return NvResult::BadValue;
}
std::memcpy(output.data(), &params, sizeof(params));
@@ -234,46 +239,29 @@ NvResult nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output)
}
NvResult nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
// TODO(Subv): These flags are unconfirmed.
enum FreeFlags {
Freed = 0,
NotFreedYet = 1,
};
IocFreeParams params;
std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "(STUBBED) called");
LOG_DEBUG(Service_NVDRV, "called");
auto itr = handles.find(params.handle);
if (itr == handles.end()) {
LOG_ERROR(Service_NVDRV, "Object does not exist, handle={:08X}", params.handle);
return NvResult::BadValue;
}
if (!itr->second->refcount) {
LOG_ERROR(
Service_NVDRV,
"There is no references to this object. The object is already freed. handle={:08X}",
params.handle);
return NvResult::BadValue;
if (!params.handle) {
LOG_CRITICAL(Service_NVDRV, "Handle null freed?");
return NvResult::Success;
}
itr->second->refcount--;
params.size = itr->second->size;
if (itr->second->refcount == 0) {
params.flags = Freed;
// The address of the nvmap is written to the output if we're finally freeing it, otherwise
// 0 is written.
params.address = itr->second->addr;
if (auto freeInfo{file.FreeHandle(params.handle, false)}) {
ASSERT(system.CurrentProcess()
->PageTable()
.UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size)
.IsSuccess());
params.address = freeInfo->address;
params.size = static_cast<u32>(freeInfo->size);
params.flags.raw = 0;
params.flags.map_uncached.Assign(freeInfo->was_uncached);
} else {
params.flags = NotFreedYet;
params.address = 0;
// This is possible when there's internel dups or other duplicates.
}
handles.erase(params.handle);
std::memcpy(output.data(), &params, sizeof(params));
return NvResult::Success;
}

View File

@@ -9,15 +9,23 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/swap.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
namespace Service::Nvidia::NvCore {
class Container;
} // namespace Service::Nvidia::NvCore
namespace Service::Nvidia::Devices {
class nvmap final : public nvdevice {
public:
explicit nvmap(Core::System& system_);
explicit nvmap(Core::System& system_, NvCore::Container& container);
~nvmap() override;
nvmap(const nvmap&) = delete;
nvmap& operator=(const nvmap&) = delete;
NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
std::vector<u8>& output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -28,31 +36,15 @@ public:
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
/// Returns the allocated address of an nvmap object given its handle.
VAddr GetObjectAddress(u32 handle) const;
/// Represents an nvmap object.
struct Object {
enum class Status { Created, Allocated };
u32 id;
u32 size;
u32 flags;
u32 align;
u8 kind;
VAddr addr;
Status status;
u32 refcount;
u32 dma_map_addr;
enum class HandleParameterType : u32_le {
Size = 1,
Alignment = 2,
Base = 3,
Heap = 4,
Kind = 5,
IsSharedMemMapped = 6
};
std::shared_ptr<Object> GetObject(u32 handle) const {
auto itr = handles.find(handle);
if (itr != handles.end()) {
return itr->second;
}
return {};
}
private:
/// Id to use for the next handle that is created.
u32 next_handle = 0;
@@ -60,9 +52,6 @@ private:
/// Id to use for the next object that is created.
u32 next_id = 0;
/// Mapping of currently allocated handles to the objects they represent.
std::unordered_map<u32, std::shared_ptr<Object>> handles;
struct IocCreateParams {
// Input
u32_le size{};
@@ -83,11 +72,11 @@ private:
// Input
u32_le handle{};
u32_le heap_mask{};
u32_le flags{};
NvCore::NvMap::Handle::Flags flags{};
u32_le align{};
u8 kind{};
INSERT_PADDING_BYTES(7);
u64_le addr{};
u64_le address{};
};
static_assert(sizeof(IocAllocParams) == 32, "IocAllocParams has wrong size");
@@ -96,14 +85,14 @@ private:
INSERT_PADDING_BYTES(4);
u64_le address{};
u32_le size{};
u32_le flags{};
NvCore::NvMap::Handle::Flags flags{};
};
static_assert(sizeof(IocFreeParams) == 24, "IocFreeParams has wrong size");
struct IocParamParams {
// Input
u32_le handle{};
u32_le param{};
HandleParameterType param{};
// Output
u32_le result{};
};
@@ -117,14 +106,15 @@ private:
};
static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");
u32 CreateObject(u32 size);
NvResult IocCreate(const std::vector<u8>& input, std::vector<u8>& output);
NvResult IocAlloc(const std::vector<u8>& input, std::vector<u8>& output);
NvResult IocGetId(const std::vector<u8>& input, std::vector<u8>& output);
NvResult IocFromId(const std::vector<u8>& input, std::vector<u8>& output);
NvResult IocParam(const std::vector<u8>& input, std::vector<u8>& output);
NvResult IocFree(const std::vector<u8>& input, std::vector<u8>& output);
NvCore::Container& container;
NvCore::NvMap& file;
};
} // namespace Service::Nvidia::Devices

View File

@@ -1,5 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
@@ -78,11 +79,15 @@ enum class NvResult : u32 {
ModuleNotPresent = 0xA000E,
};
// obtained from
// https://github.com/skyline-emu/skyline/blob/nvdec-dev/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.h#L47
enum class EventState {
Free = 0,
Registered = 1,
Waiting = 2,
Busy = 3,
Available = 0,
Waiting = 1,
Cancelling = 2,
Signalling = 3,
Signalled = 4,
Cancelled = 5,
};
union Ioctl {

View File

@@ -1,5 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#include <utility>
@@ -8,6 +9,7 @@
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/k_event.h"
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
@@ -15,17 +17,31 @@
#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
#include "core/hle/service/nvdrv/devices/nvhost_nvjpg.h"
#include "core/hle/service/nvdrv/devices/nvhost_vic.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "core/hle/service/nvdrv/nvdrv_interface.h"
#include "core/hle/service/nvdrv/nvmemp.h"
#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/hle/service/nvflinger/nvflinger.h"
#include "video_core/gpu.h"
namespace Service::Nvidia {
EventInterface::EventInterface(Module& module_) : module{module_}, guard{}, on_signal{} {}
EventInterface::~EventInterface() = default;
Kernel::KEvent* EventInterface::CreateEvent(std::string name) {
Kernel::KEvent* new_event = module.service_context.CreateEvent(std::move(name));
return new_event;
}
void EventInterface::FreeEvent(Kernel::KEvent* event) {
module.service_context.CloseEvent(event);
}
void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger,
Core::System& system) {
auto module_ = std::make_shared<Module>(system);
@@ -38,34 +54,54 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
}
Module::Module(Core::System& system)
: syncpoint_manager{system.GPU()}, service_context{system, "nvdrv"} {
for (u32 i = 0; i < MaxNvEvents; i++) {
events_interface.events[i].event =
service_context.CreateEvent(fmt::format("NVDRV::NvEvent_{}", i));
events_interface.status[i] = EventState::Free;
events_interface.registered[i] = false;
}
auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
devices["/dev/nvhost-gpu"] =
std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager);
devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
devices["/dev/nvmap"] = nvmap_dev;
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
devices["/dev/nvhost-ctrl"] =
std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
devices["/dev/nvhost-nvdec"] =
std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev, syncpoint_manager);
devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
devices["/dev/nvhost-vic"] =
std::make_shared<Devices::nvhost_vic>(system, nvmap_dev, syncpoint_manager);
: service_context{system, "nvdrv"}, events_interface{*this}, container{system.Host1x()} {
builders["/dev/nvhost-as-gpu"] = [this, &system](DeviceFD fd) {
std::shared_ptr<Devices::nvdevice> device =
std::make_shared<Devices::nvhost_as_gpu>(system, *this, container);
return open_files.emplace(fd, device).first;
};
builders["/dev/nvhost-gpu"] = [this, &system](DeviceFD fd) {
std::shared_ptr<Devices::nvdevice> device =
std::make_shared<Devices::nvhost_gpu>(system, events_interface, container);
return open_files.emplace(fd, device).first;
};
builders["/dev/nvhost-ctrl-gpu"] = [this, &system](DeviceFD fd) {
std::shared_ptr<Devices::nvdevice> device =
std::make_shared<Devices::nvhost_ctrl_gpu>(system, events_interface);
return open_files.emplace(fd, device).first;
};
builders["/dev/nvmap"] = [this, &system](DeviceFD fd) {
std::shared_ptr<Devices::nvdevice> device =
std::make_shared<Devices::nvmap>(system, container);
return open_files.emplace(fd, device).first;
};
builders["/dev/nvdisp_disp0"] = [this, &system](DeviceFD fd) {
std::shared_ptr<Devices::nvdevice> device =
std::make_shared<Devices::nvdisp_disp0>(system, container);
return open_files.emplace(fd, device).first;
};
builders["/dev/nvhost-ctrl"] = [this, &system](DeviceFD fd) {
std::shared_ptr<Devices::nvdevice> device =
std::make_shared<Devices::nvhost_ctrl>(system, events_interface, container);
return open_files.emplace(fd, device).first;
};
builders["/dev/nvhost-nvdec"] = [this, &system](DeviceFD fd) {
std::shared_ptr<Devices::nvdevice> device =
std::make_shared<Devices::nvhost_nvdec>(system, container);
return open_files.emplace(fd, device).first;
};
builders["/dev/nvhost-nvjpg"] = [this, &system](DeviceFD fd) {
std::shared_ptr<Devices::nvdevice> device = std::make_shared<Devices::nvhost_nvjpg>(system);
return open_files.emplace(fd, device).first;
};
builders["/dev/nvhost-vic"] = [this, &system](DeviceFD fd) {
std::shared_ptr<Devices::nvdevice> device =
std::make_shared<Devices::nvhost_vic>(system, container);
return open_files.emplace(fd, device).first;
};
}
Module::~Module() {
for (u32 i = 0; i < MaxNvEvents; i++) {
service_context.CloseEvent(events_interface.events[i].event);
}
}
Module::~Module() {}
NvResult Module::VerifyFD(DeviceFD fd) const {
if (fd < 0) {
@@ -82,18 +118,18 @@ NvResult Module::VerifyFD(DeviceFD fd) const {
}
DeviceFD Module::Open(const std::string& device_name) {
if (devices.find(device_name) == devices.end()) {
auto it = builders.find(device_name);
if (it == builders.end()) {
LOG_ERROR(Service_NVDRV, "Trying to open unknown device {}", device_name);
return INVALID_NVDRV_FD;
}
auto device = devices[device_name];
const DeviceFD fd = next_fd++;
auto& builder = it->second;
auto device = builder(fd)->second;
device->OnOpen(fd);
open_files[fd] = std::move(device);
return fd;
}
@@ -168,22 +204,24 @@ NvResult Module::Close(DeviceFD fd) {
return NvResult::Success;
}
void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
for (u32 i = 0; i < MaxNvEvents; i++) {
if (events_interface.assigned_syncpt[i] == syncpoint_id &&
events_interface.assigned_value[i] == value) {
events_interface.LiberateEvent(i);
events_interface.events[i].event->GetWritableEvent().Signal();
}
NvResult Module::QueryEvent(DeviceFD fd, u32 event_id, Kernel::KEvent*& event) {
if (fd < 0) {
LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
return NvResult::InvalidState;
}
}
Kernel::KReadableEvent& Module::GetEvent(const u32 event_id) {
return events_interface.events[event_id].event->GetReadableEvent();
}
const auto itr = open_files.find(fd);
Kernel::KWritableEvent& Module::GetEventWriteable(const u32 event_id) {
return events_interface.events[event_id].event->GetWritableEvent();
if (itr == open_files.end()) {
LOG_ERROR(Service_NVDRV, "Could not find DeviceFD={}!", fd);
return NvResult::NotImplemented;
}
event = itr->second->QueryEvent(event_id);
if (!event) {
return NvResult::BadParameter;
}
return NvResult::Success;
}
} // namespace Service::Nvidia

View File

@@ -1,16 +1,20 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <functional>
#include <list>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/hle/service/nvflinger/ui/fence.h"
#include "core/hle/service/service.h"
@@ -28,81 +32,31 @@ class NVFlinger;
namespace Service::Nvidia {
namespace NvCore {
class Container;
class SyncpointManager;
} // namespace NvCore
namespace Devices {
class nvdevice;
}
class nvhost_ctrl;
} // namespace Devices
/// Represents an Nvidia event
struct NvEvent {
Kernel::KEvent* event{};
NvFence fence{};
};
class Module;
struct EventInterface {
// Mask representing currently busy events
u64 events_mask{};
// Each kernel event associated to an NV event
std::array<NvEvent, MaxNvEvents> events;
// The status of the current NVEvent
std::array<EventState, MaxNvEvents> status{};
// Tells if an NVEvent is registered or not
std::array<bool, MaxNvEvents> registered{};
// Tells the NVEvent that it has failed.
std::array<bool, MaxNvEvents> failed{};
// When an NVEvent is waiting on GPU interrupt, this is the sync_point
// associated with it.
std::array<u32, MaxNvEvents> assigned_syncpt{};
// This is the value of the GPU interrupt for which the NVEvent is waiting
// for.
std::array<u32, MaxNvEvents> assigned_value{};
// Constant to denote an unasigned syncpoint.
static constexpr u32 unassigned_syncpt = 0xFFFFFFFF;
std::optional<u32> GetFreeEvent() const {
u64 mask = events_mask;
for (u32 i = 0; i < MaxNvEvents; i++) {
const bool is_free = (mask & 0x1) == 0;
if (is_free) {
if (status[i] == EventState::Registered || status[i] == EventState::Free) {
return {i};
}
}
mask = mask >> 1;
}
return std::nullopt;
}
void SetEventStatus(const u32 event_id, EventState new_status) {
EventState old_status = status[event_id];
if (old_status == new_status) {
return;
}
status[event_id] = new_status;
if (new_status == EventState::Registered) {
registered[event_id] = true;
}
if (new_status == EventState::Waiting || new_status == EventState::Busy) {
events_mask |= (1ULL << event_id);
}
}
void RegisterEvent(const u32 event_id) {
registered[event_id] = true;
if (status[event_id] == EventState::Free) {
status[event_id] = EventState::Registered;
}
}
void UnregisterEvent(const u32 event_id) {
registered[event_id] = false;
if (status[event_id] == EventState::Registered) {
status[event_id] = EventState::Free;
}
}
void LiberateEvent(const u32 event_id) {
status[event_id] = registered[event_id] ? EventState::Registered : EventState::Free;
events_mask &= ~(1ULL << event_id);
assigned_syncpt[event_id] = unassigned_syncpt;
assigned_value[event_id] = 0;
}
class EventInterface {
public:
explicit EventInterface(Module& module_);
~EventInterface();
Kernel::KEvent* CreateEvent(std::string name);
void FreeEvent(Kernel::KEvent* event);
private:
Module& module;
std::mutex guard;
std::list<Devices::nvhost_ctrl*> on_signal;
};
class Module final {
@@ -112,9 +66,9 @@ public:
/// Returns a pointer to one of the available devices, identified by its name.
template <typename T>
std::shared_ptr<T> GetDevice(const std::string& name) {
auto itr = devices.find(name);
if (itr == devices.end())
std::shared_ptr<T> GetDevice(DeviceFD fd) {
auto itr = open_files.find(fd);
if (itr == open_files.end())
return nullptr;
return std::static_pointer_cast<T>(itr->second);
}
@@ -137,28 +91,27 @@ public:
/// Closes a device file descriptor and returns operation success.
NvResult Close(DeviceFD fd);
void SignalSyncpt(const u32 syncpoint_id, const u32 value);
Kernel::KReadableEvent& GetEvent(u32 event_id);
Kernel::KWritableEvent& GetEventWriteable(u32 event_id);
NvResult QueryEvent(DeviceFD fd, u32 event_id, Kernel::KEvent*& event);
private:
/// Manages syncpoints on the host
SyncpointManager syncpoint_manager;
friend class EventInterface;
friend class Service::NVFlinger::NVFlinger;
/// Id to use for the next open file descriptor.
DeviceFD next_fd = 1;
using FilesContainerType = std::unordered_map<DeviceFD, std::shared_ptr<Devices::nvdevice>>;
/// Mapping of file descriptors to the devices they reference.
std::unordered_map<DeviceFD, std::shared_ptr<Devices::nvdevice>> open_files;
FilesContainerType open_files;
/// Mapping of device node names to their implementation.
std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices;
KernelHelpers::ServiceContext service_context;
EventInterface events_interface;
KernelHelpers::ServiceContext service_context;
/// Manages syncpoints on the host
NvCore::Container container;
std::unordered_map<std::string, std::function<FilesContainerType::iterator(DeviceFD)>> builders;
};
/// Registers all NVDRV services with the specified service manager.

View File

@@ -1,10 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
#include <cinttypes>
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/k_event.h"
#include "core/hle/kernel/k_readable_event.h"
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/hle/service/nvdrv/nvdrv.h"
@@ -12,10 +14,6 @@
namespace Service::Nvidia {
void NVDRV::SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
nvdrv->SignalSyncpt(syncpoint_id, value);
}
void NVDRV::Open(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_NVDRV, "called");
IPC::ResponseBuilder rb{ctx, 4};
@@ -164,8 +162,7 @@ void NVDRV::Initialize(Kernel::HLERequestContext& ctx) {
void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto fd = rp.Pop<DeviceFD>();
const auto event_id = rp.Pop<u32>() & 0x00FF;
LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id);
const auto event_id = rp.Pop<u32>();
if (!is_initialized) {
ServiceError(ctx, NvResult::NotInitialized);
@@ -173,24 +170,20 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
return;
}
const auto nv_result = nvdrv->VerifyFD(fd);
if (nv_result != NvResult::Success) {
LOG_ERROR(Service_NVDRV, "Invalid FD specified DeviceFD={}!", fd);
ServiceError(ctx, nv_result);
return;
}
Kernel::KEvent* event = nullptr;
NvResult result = nvdrv->QueryEvent(fd, event_id, event);
if (event_id < MaxNvEvents) {
if (result == NvResult::Success) {
IPC::ResponseBuilder rb{ctx, 3, 1};
rb.Push(ResultSuccess);
auto& event = nvdrv->GetEvent(event_id);
event.Clear();
rb.PushCopyObjects(event);
auto& readable_event = event->GetReadableEvent();
rb.PushCopyObjects(readable_event);
rb.PushEnum(NvResult::Success);
} else {
LOG_ERROR(Service_NVDRV, "Invalid event request!");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
rb.PushEnum(NvResult::BadParameter);
rb.PushEnum(result);
}
}

View File

@@ -18,8 +18,6 @@ public:
explicit NVDRV(Core::System& system_, std::shared_ptr<Module> nvdrv_, const char* name);
~NVDRV() override;
void SignalGPUInterruptSyncpt(u32 syncpoint_id, u32 value);
private:
void Open(Kernel::HLERequestContext& ctx);
void Ioctl1(Kernel::HLERequestContext& ctx);

View File

@@ -1,38 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "video_core/gpu.h"
namespace Service::Nvidia {
SyncpointManager::SyncpointManager(Tegra::GPU& gpu_) : gpu{gpu_} {}
SyncpointManager::~SyncpointManager() = default;
u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) {
syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id);
return GetSyncpointMin(syncpoint_id);
}
u32 SyncpointManager::AllocateSyncpoint() {
for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) {
if (!syncpoints[syncpoint_id].is_allocated) {
syncpoints[syncpoint_id].is_allocated = true;
return syncpoint_id;
}
}
ASSERT_MSG(false, "No more available syncpoints!");
return {};
}
u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) {
for (u32 index = 0; index < value; ++index) {
syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed);
}
return GetSyncpointMax(syncpoint_id);
}
} // namespace Service::Nvidia

View File

@@ -1,84 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <atomic>
#include "common/common_types.h"
#include "core/hle/service/nvdrv/nvdata.h"
namespace Tegra {
class GPU;
}
namespace Service::Nvidia {
class SyncpointManager final {
public:
explicit SyncpointManager(Tegra::GPU& gpu_);
~SyncpointManager();
/**
* Returns true if the specified syncpoint is expired for the given value.
* @param syncpoint_id Syncpoint ID to check.
* @param value Value to check against the specified syncpoint.
* @returns True if the specified syncpoint is expired for the given value, otherwise False.
*/
bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const {
return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value);
}
/**
* Gets the lower bound for the specified syncpoint.
* @param syncpoint_id Syncpoint ID to get the lower bound for.
* @returns The lower bound for the specified syncpoint.
*/
u32 GetSyncpointMin(u32 syncpoint_id) const {
return syncpoints.at(syncpoint_id).min.load(std::memory_order_relaxed);
}
/**
* Gets the uper bound for the specified syncpoint.
* @param syncpoint_id Syncpoint ID to get the upper bound for.
* @returns The upper bound for the specified syncpoint.
*/
u32 GetSyncpointMax(u32 syncpoint_id) const {
return syncpoints.at(syncpoint_id).max.load(std::memory_order_relaxed);
}
/**
* Refreshes the minimum value for the specified syncpoint.
* @param syncpoint_id Syncpoint ID to be refreshed.
* @returns The new syncpoint minimum value.
*/
u32 RefreshSyncpoint(u32 syncpoint_id);
/**
* Allocates a new syncoint.
* @returns The syncpoint ID for the newly allocated syncpoint.
*/
u32 AllocateSyncpoint();
/**
* Increases the maximum value for the specified syncpoint.
* @param syncpoint_id Syncpoint ID to be increased.
* @param value Value to increase the specified syncpoint by.
* @returns The new syncpoint maximum value.
*/
u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value);
private:
struct Syncpoint {
std::atomic<u32> min;
std::atomic<u32> max;
std::atomic<bool> is_allocated;
};
std::array<Syncpoint, MaxSyncPoints> syncpoints{};
Tegra::GPU& gpu;
};
} // namespace Service::Nvidia

View File

@@ -5,15 +5,18 @@
// https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/gui/BufferQueueConsumer.cpp
#include "common/logging/log.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvflinger/buffer_item.h"
#include "core/hle/service/nvflinger/buffer_queue_consumer.h"
#include "core/hle/service/nvflinger/buffer_queue_core.h"
#include "core/hle/service/nvflinger/producer_listener.h"
#include "core/hle/service/nvflinger/ui/graphic_buffer.h"
namespace Service::android {
BufferQueueConsumer::BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_)
: core{std::move(core_)}, slots{core->slots} {}
BufferQueueConsumer::BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_,
Service::Nvidia::NvCore::NvMap& nvmap_)
: core{std::move(core_)}, slots{core->slots}, nvmap(nvmap_) {}
BufferQueueConsumer::~BufferQueueConsumer() = default;
@@ -133,6 +136,8 @@ Status BufferQueueConsumer::ReleaseBuffer(s32 slot, u64 frame_number, const Fenc
slots[slot].buffer_state = BufferState::Free;
nvmap.FreeHandle(slots[slot].graphic_buffer->BufferId(), true);
listener = core->connected_producer_listener;
LOG_DEBUG(Service_NVFlinger, "releasing slot {}", slot);

View File

@@ -13,6 +13,10 @@
#include "core/hle/service/nvflinger/buffer_queue_defs.h"
#include "core/hle/service/nvflinger/status.h"
namespace Service::Nvidia::NvCore {
class NvMap;
} // namespace Service::Nvidia::NvCore
namespace Service::android {
class BufferItem;
@@ -21,7 +25,8 @@ class IConsumerListener;
class BufferQueueConsumer final {
public:
explicit BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_);
explicit BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_,
Service::Nvidia::NvCore::NvMap& nvmap_);
~BufferQueueConsumer();
Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present);
@@ -32,6 +37,7 @@ public:
private:
std::shared_ptr<BufferQueueCore> core;
BufferQueueDefs::SlotsType& slots;
Service::Nvidia::NvCore::NvMap& nvmap;
};
} // namespace Service::android

View File

@@ -14,7 +14,7 @@
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvflinger/buffer_queue_core.h"
#include "core/hle/service/nvflinger/buffer_queue_producer.h"
#include "core/hle/service/nvflinger/consumer_listener.h"
@@ -26,8 +26,10 @@
namespace Service::android {
BufferQueueProducer::BufferQueueProducer(Service::KernelHelpers::ServiceContext& service_context_,
std::shared_ptr<BufferQueueCore> buffer_queue_core_)
: service_context{service_context_}, core{std::move(buffer_queue_core_)}, slots(core->slots) {
std::shared_ptr<BufferQueueCore> buffer_queue_core_,
Service::Nvidia::NvCore::NvMap& nvmap_)
: service_context{service_context_}, core{std::move(buffer_queue_core_)}, slots(core->slots),
nvmap(nvmap_) {
buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent");
}
@@ -530,6 +532,8 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
item.is_droppable = core->dequeue_buffer_cannot_block || async;
item.swap_interval = swap_interval;
nvmap.DuplicateHandle(item.graphic_buffer->BufferId(), true);
sticky_transform = sticky_transform_;
if (core->queue.empty()) {

View File

@@ -31,6 +31,10 @@ namespace Service::KernelHelpers {
class ServiceContext;
} // namespace Service::KernelHelpers
namespace Service::Nvidia::NvCore {
class NvMap;
} // namespace Service::Nvidia::NvCore
namespace Service::android {
class BufferQueueCore;
@@ -39,7 +43,8 @@ class IProducerListener;
class BufferQueueProducer final : public IBinder {
public:
explicit BufferQueueProducer(Service::KernelHelpers::ServiceContext& service_context_,
std::shared_ptr<BufferQueueCore> buffer_queue_core_);
std::shared_ptr<BufferQueueCore> buffer_queue_core_,
Service::Nvidia::NvCore::NvMap& nvmap_);
~BufferQueueProducer();
void Transact(Kernel::HLERequestContext& ctx, android::TransactionId code, u32 flags) override;
@@ -78,6 +83,8 @@ private:
s32 next_callback_ticket{};
s32 current_callback_ticket{};
std::condition_variable_any callback_condition;
Service::Nvidia::NvCore::NvMap& nvmap;
};
} // namespace Service::android

View File

@@ -24,6 +24,8 @@
#include "core/hle/service/vi/layer/vi_layer.h"
#include "core/hle/service/vi/vi_results.h"
#include "video_core/gpu.h"
#include "video_core/host1x/host1x.h"
#include "video_core/host1x/syncpoint_manager.h"
namespace Service::NVFlinger {
@@ -31,7 +33,7 @@ constexpr auto frame_ns = std::chrono::nanoseconds{1000000000 / 60};
void NVFlinger::SplitVSync(std::stop_token stop_token) {
system.RegisterHostThread();
std::string name = "yuzu:VSyncThread";
std::string name = "VSyncThread";
MicroProfileOnThreadCreate(name.c_str());
// Cleanup
@@ -105,10 +107,15 @@ NVFlinger::~NVFlinger() {
display.GetLayer(layer).Core().NotifyShutdown();
}
}
if (nvdrv) {
nvdrv->Close(disp_fd);
}
}
void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
nvdrv = std::move(instance);
disp_fd = nvdrv->Open("/dev/nvdisp_disp0");
}
std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
@@ -142,7 +149,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
void NVFlinger::CreateLayerAtId(VI::Display& display, u64 layer_id) {
const auto buffer_id = next_buffer_queue_id++;
display.CreateLayer(layer_id, buffer_id);
display.CreateLayer(layer_id, buffer_id, nvdrv->container);
}
void NVFlinger::CloseLayer(u64 layer_id) {
@@ -262,30 +269,24 @@ void NVFlinger::Compose() {
return; // We are likely shutting down
}
auto& gpu = system.GPU();
const auto& multi_fence = buffer.fence;
guard->unlock();
for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
const auto& fence = multi_fence.fences[fence_id];
gpu.WaitFence(fence.id, fence.value);
}
guard->lock();
MicroProfileFlip();
// Now send the buffer to the GPU for drawing.
// TODO(Subv): Support more than just disp0. The display device selection is probably based
// on which display we're drawing (Default, Internal, External, etc)
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd);
ASSERT(nvdisp);
guard->unlock();
Common::Rectangle<int> crop_rect{
static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()),
static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())};
nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(),
igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(),
static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect);
static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect,
buffer.fence.fences, buffer.fence.num_fences);
MicroProfileFlip();
guard->lock();
swap_interval = buffer.swap_interval;

View File

@@ -116,6 +116,7 @@ private:
void SplitVSync(std::stop_token stop_token);
std::shared_ptr<Nvidia::Module> nvdrv;
s32 disp_fd;
std::list<VI::Display> displays;

View File

@@ -12,6 +12,7 @@
#include "core/hle/kernel/k_readable_event.h"
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvflinger/buffer_item_consumer.h"
#include "core/hle/service/nvflinger/buffer_queue_consumer.h"
#include "core/hle/service/nvflinger/buffer_queue_core.h"
@@ -29,11 +30,13 @@ struct BufferQueue {
std::unique_ptr<android::BufferQueueConsumer> consumer;
};
static BufferQueue CreateBufferQueue(KernelHelpers::ServiceContext& service_context) {
static BufferQueue CreateBufferQueue(KernelHelpers::ServiceContext& service_context,
Service::Nvidia::NvCore::NvMap& nvmap) {
auto buffer_queue_core = std::make_shared<android::BufferQueueCore>();
return {buffer_queue_core,
std::make_unique<android::BufferQueueProducer>(service_context, buffer_queue_core),
std::make_unique<android::BufferQueueConsumer>(buffer_queue_core)};
return {
buffer_queue_core,
std::make_unique<android::BufferQueueProducer>(service_context, buffer_queue_core, nvmap),
std::make_unique<android::BufferQueueConsumer>(buffer_queue_core, nvmap)};
}
Display::Display(u64 id, std::string name_,
@@ -74,10 +77,11 @@ void Display::SignalVSyncEvent() {
vsync_event->GetWritableEvent().Signal();
}
void Display::CreateLayer(u64 layer_id, u32 binder_id) {
void Display::CreateLayer(u64 layer_id, u32 binder_id,
Service::Nvidia::NvCore::Container& nv_core) {
ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
auto [core, producer, consumer] = CreateBufferQueue(service_context);
auto [core, producer, consumer] = CreateBufferQueue(service_context, nv_core.GetNvMapFile());
auto buffer_item_consumer = std::make_shared<android::BufferItemConsumer>(std::move(consumer));
buffer_item_consumer->Connect(false);

View File

@@ -27,6 +27,11 @@ namespace Service::NVFlinger {
class HosBinderDriverServer;
}
namespace Service::Nvidia::NvCore {
class Container;
class NvMap;
} // namespace Service::Nvidia::NvCore
namespace Service::VI {
class Layer;
@@ -93,7 +98,7 @@ public:
/// @param layer_id The ID to assign to the created layer.
/// @param binder_id The ID assigned to the buffer queue.
///
void CreateLayer(u64 layer_id, u32 binder_id);
void CreateLayer(u64 layer_id, u32 binder_id, Service::Nvidia::NvCore::Container& core);
/// Closes and removes a layer from this display with the given ID.
///

View File

@@ -58,6 +58,7 @@ static_assert(sizeof(DisplayInfo) == 0x60, "DisplayInfo has wrong size");
class NativeWindow final {
public:
constexpr explicit NativeWindow(u32 id_) : id{id_} {}
constexpr explicit NativeWindow(const NativeWindow& other) = default;
private:
const u32 magic = 2;

View File

@@ -551,6 +551,11 @@ struct Memory::Impl {
[]() {});
}
[[nodiscard]] u8* GetPointerSilent(const VAddr vaddr) const {
return GetPointerImpl(
vaddr, []() {}, []() {});
}
/**
* Reads a particular data type out of memory at the given virtual address.
*
@@ -686,6 +691,10 @@ u8* Memory::GetPointer(VAddr vaddr) {
return impl->GetPointer(vaddr);
}
u8* Memory::GetPointerSilent(VAddr vaddr) {
return impl->GetPointerSilent(vaddr);
}
const u8* Memory::GetPointer(VAddr vaddr) const {
return impl->GetPointer(vaddr);
}

View File

@@ -114,6 +114,7 @@ public:
* If the address is not valid, nullptr will be returned.
*/
u8* GetPointer(VAddr vaddr);
u8* GetPointerSilent(VAddr vaddr);
template <typename T>
T* GetPointer(VAddr vaddr) {

View File

@@ -23,5 +23,5 @@ endif()
target_link_libraries(yuzu-room PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads)
if(UNIX AND NOT APPLE)
install(TARGETS yuzu-room RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
install(TARGETS yuzu-room)
endif()

View File

@@ -90,7 +90,7 @@ GCAdapter::~GCAdapter() {
void GCAdapter::AdapterInputThread(std::stop_token stop_token) {
LOG_DEBUG(Input, "Input thread started");
Common::SetCurrentThreadName("yuzu:input:GCAdapter");
Common::SetCurrentThreadName("GCAdapter");
s32 payload_size{};
AdapterPayload adapter_payload{};
@@ -214,7 +214,7 @@ void GCAdapter::UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_
}
void GCAdapter::AdapterScanThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("yuzu:input:ScanGCAdapter");
Common::SetCurrentThreadName("ScanGCAdapter");
usb_adapter_handle = nullptr;
pads = {};
while (!stop_token.stop_requested() && !Setup()) {

View File

@@ -37,7 +37,7 @@ Mouse::Mouse(std::string input_engine_) : InputEngine(std::move(input_engine_))
}
void Mouse::UpdateThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("yuzu:input:Mouse");
Common::SetCurrentThreadName("Mouse");
constexpr int update_time = 10;
while (!stop_token.stop_requested()) {
if (Settings::values.mouse_panning && !Settings::values.mouse_enabled) {

View File

@@ -436,7 +436,7 @@ SDLDriver::SDLDriver(std::string input_engine_) : InputEngine(std::move(input_en
initialized = true;
if (start_thread) {
poll_thread = std::thread([this] {
Common::SetCurrentThreadName("yuzu:input:SDL");
Common::SetCurrentThreadName("SDL_MainLoop");
using namespace std::chrono_literals;
while (initialized) {
SDL_PumpEvents();
@@ -444,7 +444,7 @@ SDLDriver::SDLDriver(std::string input_engine_) : InputEngine(std::move(input_en
}
});
vibration_thread = std::thread([this] {
Common::SetCurrentThreadName("yuzu:input:SDL_Vibration");
Common::SetCurrentThreadName("SDL_Vibration");
using namespace std::chrono_literals;
while (initialized) {
SendVibrations();

View File

@@ -13,9 +13,6 @@ namespace Shader::Backend::GLASM {
namespace {
void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
std::string_view size) {
if (!binding.IsImmediate()) {
throw NotImplementedException("Indirect constant buffer loading");
}
const Register ret{ctx.reg_alloc.Define(inst)};
if (offset.type == Type::U32) {
// Avoid reading arrays out of bounds, matching hardware's behavior
@@ -24,7 +21,27 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU
return;
}
}
ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
if (binding.IsImmediate()) {
ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
return;
}
const ScalarU32 idx{ctx.reg_alloc.Consume(binding)};
for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
ctx.Add("SEQ.S.CC RC.x,{},{};"
"IF NE.x;"
"LDC.{} {},c{}[{}];",
idx, i, size, ret, i, offset);
if (i != Info::MAX_INDIRECT_CBUFS - 1) {
ctx.Add("ELSE;");
}
}
for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
ctx.Add("ENDIF;");
}
}
bool IsInputArray(Stage stage) {

View File

@@ -964,9 +964,9 @@ private:
demote_endif_node.type = Type::EndIf;
demote_endif_node.data.end_if.merge = return_block_it->data.block;
asl.insert(return_block_it, demote_endif_node);
asl.insert(return_block_it, demote_node);
asl.insert(return_block_it, demote_if_node);
const auto next_it_1 = asl.insert(return_block_it, demote_endif_node);
const auto next_it_2 = asl.insert(next_it_1, demote_node);
asl.insert(next_it_2, demote_if_node);
}
ObjectPool<Statement>& stmt_pool;

View File

@@ -19,8 +19,10 @@ namespace {
struct ConstBufferAddr {
u32 index;
u32 offset;
u32 shift_left;
u32 secondary_index;
u32 secondary_offset;
u32 secondary_shift_left;
IR::U32 dynamic_offset;
u32 count;
bool has_secondary;
@@ -172,19 +174,41 @@ bool IsTextureInstruction(const IR::Inst& inst) {
return IndexedInstruction(inst) != IR::Opcode::Void;
}
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst);
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env);
std::optional<ConstBufferAddr> Track(const IR::Value& value) {
return IR::BreadthFirstSearch(value, TryGetConstBuffer);
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env) {
return IR::BreadthFirstSearch(
value, [&env](const IR::Inst* inst) { return TryGetConstBuffer(inst, env); });
}
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
std::optional<u32> TryGetConstant(IR::Value& value, Environment& env) {
const IR::Inst* inst = value.InstRecursive();
if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
return std::nullopt;
}
const IR::Value index{inst->Arg(0)};
const IR::Value offset{inst->Arg(1)};
if (!index.IsImmediate()) {
return std::nullopt;
}
if (!offset.IsImmediate()) {
return std::nullopt;
}
const auto index_number = index.U32();
if (index_number != 1) {
return std::nullopt;
}
const auto offset_number = offset.U32();
return env.ReadCbufValue(index_number, offset_number);
}
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env) {
switch (inst->GetOpcode()) {
default:
return std::nullopt;
case IR::Opcode::BitwiseOr32: {
std::optional lhs{Track(inst->Arg(0))};
std::optional rhs{Track(inst->Arg(1))};
std::optional lhs{Track(inst->Arg(0), env)};
std::optional rhs{Track(inst->Arg(1), env)};
if (!lhs || !rhs) {
return std::nullopt;
}
@@ -194,19 +218,62 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
if (lhs->count > 1 || rhs->count > 1) {
return std::nullopt;
}
if (lhs->index > rhs->index || lhs->offset > rhs->offset) {
if (lhs->shift_left > 0 || lhs->index > rhs->index || lhs->offset > rhs->offset) {
std::swap(lhs, rhs);
}
return ConstBufferAddr{
.index = lhs->index,
.offset = lhs->offset,
.shift_left = lhs->shift_left,
.secondary_index = rhs->index,
.secondary_offset = rhs->offset,
.secondary_shift_left = rhs->shift_left,
.dynamic_offset = {},
.count = 1,
.has_secondary = true,
};
}
case IR::Opcode::ShiftLeftLogical32: {
const IR::Value shift{inst->Arg(1)};
if (!shift.IsImmediate()) {
return std::nullopt;
}
std::optional lhs{Track(inst->Arg(0), env)};
if (lhs) {
lhs->shift_left = shift.U32();
}
return lhs;
break;
}
case IR::Opcode::BitwiseAnd32: {
IR::Value op1{inst->Arg(0)};
IR::Value op2{inst->Arg(1)};
if (op1.IsImmediate()) {
std::swap(op1, op2);
}
if (!op2.IsImmediate() && !op1.IsImmediate()) {
do {
auto try_index = TryGetConstant(op1, env);
if (try_index) {
op1 = op2;
op2 = IR::Value{*try_index};
break;
}
auto try_index_2 = TryGetConstant(op2, env);
if (try_index_2) {
op2 = IR::Value{*try_index_2};
break;
}
return std::nullopt;
} while (false);
}
std::optional lhs{Track(op1, env)};
if (lhs) {
lhs->shift_left = static_cast<u32>(std::countr_zero(op2.U32()));
}
return lhs;
break;
}
case IR::Opcode::GetCbufU32x2:
case IR::Opcode::GetCbufU32:
break;
@@ -222,8 +289,10 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
return ConstBufferAddr{
.index = index.U32(),
.offset = offset.U32(),
.shift_left = 0,
.secondary_index = 0,
.secondary_offset = 0,
.secondary_shift_left = 0,
.dynamic_offset = {},
.count = 1,
.has_secondary = false,
@@ -247,8 +316,10 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
return ConstBufferAddr{
.index = index.U32(),
.offset = base_offset,
.shift_left = 0,
.secondary_index = 0,
.secondary_offset = 0,
.secondary_shift_left = 0,
.dynamic_offset = dynamic_offset,
.count = 8,
.has_secondary = false,
@@ -258,7 +329,7 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
ConstBufferAddr addr;
if (IsBindless(inst)) {
const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))};
const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0), env)};
if (!track_addr) {
throw NotImplementedException("Failed to track bindless texture constant buffer");
}
@@ -267,8 +338,10 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
addr = ConstBufferAddr{
.index = env.TextureBoundBuffer(),
.offset = inst.Arg(0).U32(),
.shift_left = 0,
.secondary_index = 0,
.secondary_offset = 0,
.secondary_shift_left = 0,
.dynamic_offset = {},
.count = 1,
.has_secondary = false,
@@ -284,8 +357,9 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)};
const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)};
const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset) << cbuf.shift_left};
const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)
<< cbuf.secondary_shift_left};
return env.ReadTextureType(lhs_raw | rhs_raw);
}
@@ -487,8 +561,10 @@ void TexturePass(Environment& env, IR::Program& program) {
.has_secondary = cbuf.has_secondary,
.cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset,
.shift_left = cbuf.shift_left,
.secondary_cbuf_index = cbuf.secondary_index,
.secondary_cbuf_offset = cbuf.secondary_offset,
.secondary_shift_left = cbuf.secondary_shift_left,
.count = cbuf.count,
.size_shift = DESCRIPTOR_SIZE_SHIFT,
});
@@ -499,8 +575,10 @@ void TexturePass(Environment& env, IR::Program& program) {
.has_secondary = cbuf.has_secondary,
.cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset,
.shift_left = cbuf.shift_left,
.secondary_cbuf_index = cbuf.secondary_index,
.secondary_cbuf_offset = cbuf.secondary_offset,
.secondary_shift_left = cbuf.secondary_shift_left,
.count = cbuf.count,
.size_shift = DESCRIPTOR_SIZE_SHIFT,
});

View File

@@ -61,8 +61,10 @@ struct TextureBufferDescriptor {
bool has_secondary;
u32 cbuf_index;
u32 cbuf_offset;
u32 shift_left;
u32 secondary_cbuf_index;
u32 secondary_cbuf_offset;
u32 secondary_shift_left;
u32 count;
u32 size_shift;
};
@@ -85,8 +87,10 @@ struct TextureDescriptor {
bool has_secondary;
u32 cbuf_index;
u32 cbuf_offset;
u32 shift_left;
u32 secondary_cbuf_index;
u32 secondary_cbuf_offset;
u32 secondary_shift_left;
u32 count;
u32 size_shift;
};

View File

@@ -4,7 +4,7 @@
add_subdirectory(host_shaders)
if(LIBVA_FOUND)
set_source_files_properties(command_classes/codecs/codec.cpp
set_source_files_properties(host1x/codecs/codec.cpp
PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
endif()
@@ -15,26 +15,14 @@ add_library(video_core STATIC
buffer_cache/buffer_cache.h
cdma_pusher.cpp
cdma_pusher.h
command_classes/codecs/codec.cpp
command_classes/codecs/codec.h
command_classes/codecs/h264.cpp
command_classes/codecs/h264.h
command_classes/codecs/vp8.cpp
command_classes/codecs/vp8.h
command_classes/codecs/vp9.cpp
command_classes/codecs/vp9.h
command_classes/codecs/vp9_types.h
command_classes/host1x.cpp
command_classes/host1x.h
command_classes/nvdec.cpp
command_classes/nvdec.h
command_classes/nvdec_common.h
command_classes/sync_manager.cpp
command_classes/sync_manager.h
command_classes/vic.cpp
command_classes/vic.h
compatible_formats.cpp
compatible_formats.h
control/channel_state.cpp
control/channel_state.h
control/channel_state_cache.cpp
control/channel_state_cache.h
control/scheduler.cpp
control/scheduler.h
delayed_destruction_ring.h
dirty_flags.cpp
dirty_flags.h
@@ -54,7 +42,31 @@ add_library(video_core STATIC
engines/maxwell_3d.h
engines/maxwell_dma.cpp
engines/maxwell_dma.h
engines/puller.cpp
engines/puller.h
framebuffer_config.h
host1x/codecs/codec.cpp
host1x/codecs/codec.h
host1x/codecs/h264.cpp
host1x/codecs/h264.h
host1x/codecs/vp8.cpp
host1x/codecs/vp8.h
host1x/codecs/vp9.cpp
host1x/codecs/vp9.h
host1x/codecs/vp9_types.h
host1x/control.cpp
host1x/control.h
host1x/host1x.cpp
host1x/host1x.h
host1x/nvdec.cpp
host1x/nvdec.h
host1x/nvdec_common.h
host1x/sync_manager.cpp
host1x/sync_manager.h
host1x/syncpoint_manager.cpp
host1x/syncpoint_manager.h
host1x/vic.cpp
host1x/vic.h
macro/macro.cpp
macro/macro.h
macro/macro_hle.cpp
@@ -195,6 +207,7 @@ add_library(video_core STATIC
texture_cache/render_targets.h
texture_cache/samples_helper.h
texture_cache/slot_vector.h
texture_cache/texture_cache.cpp
texture_cache/texture_cache.h
texture_cache/texture_cache_base.h
texture_cache/types.h

View File

@@ -5,7 +5,6 @@
#include <algorithm>
#include <array>
#include <deque>
#include <memory>
#include <mutex>
#include <numeric>
@@ -23,6 +22,7 @@
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_base.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
@@ -56,7 +56,7 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
template <typename P>
class BufferCache {
class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
// Page size for caching purposes.
// This is unrelated to the CPU page size and it can be changed as it seems optimal.
@@ -116,10 +116,7 @@ public:
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
Runtime& runtime_);
Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
void TickFrame();
@@ -129,7 +126,7 @@ public:
void DownloadMemory(VAddr cpu_addr, u64 size);
bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer);
bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
@@ -353,7 +350,7 @@ private:
void NotifyBufferDeletion();
[[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
[[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, bool is_written = false) const;
[[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
PixelFormat format);
@@ -367,9 +364,6 @@ private:
void ClearDownload(IntervalType subtract_interval);
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
Core::Memory::Memory& cpu_memory;
SlotVector<Buffer> slot_buffers;
@@ -444,12 +438,8 @@ private:
template <class P>
BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
Runtime& runtime_)
: runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
: runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
common_ranges.clear();
@@ -552,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
template <class P>
bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
if (!cpu_src_address || !cpu_dest_address) {
return false;
}
@@ -611,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
template <class P>
bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address);
const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
if (!cpu_dst_address) {
return false;
}
@@ -635,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
template <class P>
void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
const Binding binding{
.cpu_addr = *cpu_addr,
.size = size,
@@ -673,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
if (is_indexed) {
BindHostIndexBuffer();
} else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
const auto& regs = maxwell3d.regs;
const auto& regs = maxwell3d->regs;
if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
}
@@ -733,9 +723,9 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
enabled_storage_buffers[stage] |= 1U << ssbo_index;
written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
const auto& cbufs = maxwell3d.state.shader_stages[stage];
const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
}
template <class P>
@@ -770,12 +760,12 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
enabled_compute_storage_buffers |= 1U << ssbo_index;
written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
const auto& launch_desc = kepler_compute.launch_description;
const auto& launch_desc = kepler_compute->launch_description;
ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
const auto& cbufs = launch_desc.const_buffer_config;
const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr);
compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
}
template <class P>
@@ -836,6 +826,19 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
const bool is_accuracy_normal =
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
auto it = committed_ranges.begin();
while (it != committed_ranges.end()) {
auto& current_intervals = *it;
auto next_it = std::next(it);
while (next_it != committed_ranges.end()) {
for (auto& interval : *next_it) {
current_intervals.subtract(interval);
}
next_it++;
}
it++;
}
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
@@ -991,19 +994,19 @@ void BufferCache<P>::BindHostIndexBuffer() {
const u32 size = index_buffer.size;
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
const u32 new_offset = offset + maxwell3d.regs.index_array.first *
maxwell3d.regs.index_array.FormatSizeInBytes();
const u32 new_offset = offset + maxwell3d->regs.index_array.first *
maxwell3d->regs.index_array.FormatSizeInBytes();
runtime.BindIndexBuffer(buffer, new_offset, size);
} else {
runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
buffer, offset, size);
runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format,
maxwell3d->regs.index_array.first,
maxwell3d->regs.index_array.count, buffer, offset, size);
}
}
template <class P>
void BufferCache<P>::BindHostVertexBuffers() {
auto& flags = maxwell3d.dirty.flags;
auto& flags = maxwell3d->dirty.flags;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
const Binding& binding = vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -1014,7 +1017,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
}
flags[Dirty::VertexBuffer0 + index] = false;
const u32 stride = maxwell3d.regs.vertex_array[index].stride;
const u32 stride = maxwell3d->regs.vertex_array[index].stride;
const u32 offset = buffer.Offset(binding.cpu_addr);
runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
}
@@ -1154,7 +1157,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
template <class P>
void BufferCache<P>::BindHostTransformFeedbackBuffers() {
if (maxwell3d.regs.tfb_enabled == 0) {
if (maxwell3d->regs.tfb_enabled == 0) {
return;
}
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1239,16 +1242,19 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
template <class P>
void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
if (is_indexed) {
UpdateIndexBuffer();
}
UpdateVertexBuffers();
UpdateTransformFeedbackBuffers();
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
UpdateUniformBuffers(stage);
UpdateStorageBuffers(stage);
UpdateTextureBuffers(stage);
}
do {
has_deleted_buffers = false;
if (is_indexed) {
UpdateIndexBuffer();
}
UpdateVertexBuffers();
UpdateTransformFeedbackBuffers();
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
UpdateUniformBuffers(stage);
UpdateStorageBuffers(stage);
UpdateTextureBuffers(stage);
}
} while (has_deleted_buffers);
}
template <class P>
@@ -1262,8 +1268,8 @@ template <class P>
void BufferCache<P>::UpdateIndexBuffer() {
// We have to check for the dirty flags and index count
// The index count is currently changed without updating the dirty flags
const auto& index_array = maxwell3d.regs.index_array;
auto& flags = maxwell3d.dirty.flags;
const auto& index_array = maxwell3d->regs.index_array;
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
return;
}
@@ -1272,7 +1278,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
const GPUVAddr gpu_addr_begin = index_array.StartAddress();
const GPUVAddr gpu_addr_end = index_array.EndAddress();
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
const u32 size = std::min(address_size, draw_size);
@@ -1289,8 +1295,8 @@ void BufferCache<P>::UpdateIndexBuffer() {
template <class P>
void BufferCache<P>::UpdateVertexBuffers() {
auto& flags = maxwell3d.dirty.flags;
if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
auto& flags = maxwell3d->dirty.flags;
if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) {
return;
}
flags[Dirty::VertexBuffers] = false;
@@ -1302,33 +1308,25 @@ void BufferCache<P>::UpdateVertexBuffers() {
template <class P>
void BufferCache<P>::UpdateVertexBuffer(u32 index) {
if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) {
return;
}
const auto& array = maxwell3d.regs.vertex_array[index];
const auto& limit = maxwell3d.regs.vertex_array_limit[index];
const auto& array = maxwell3d->regs.vertex_array[index];
const auto& limit = maxwell3d->regs.vertex_array_limit[index];
const GPUVAddr gpu_addr_begin = array.StartAddress();
const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
if (address_size >= 64_MiB) {
// Reported vertex buffer size is very large, cap to mapped buffer size
GPUVAddr submapped_addr_end = gpu_addr_begin;
const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
if (ranges.size() > 0) {
const auto& [addr, size] = *ranges.begin();
submapped_addr_end = addr + size;
}
address_size =
std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
}
const u32 size = address_size; // TODO: Analyze stride and number of vertices
if (array.enable == 0 || size == 0 || !cpu_addr) {
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
u32 address_size = static_cast<u32>(
std::min(gpu_addr_end - gpu_addr_begin, static_cast<u64>(std::numeric_limits<u32>::max())));
if (array.enable == 0 || address_size == 0 || !cpu_addr) {
vertex_buffers[index] = NULL_BINDING;
return;
}
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
address_size =
static_cast<u32>(gpu_memory->MaxContinousRange(gpu_addr_begin, address_size));
}
const u32 size = address_size; // TODO: Analyze stride and number of vertices
vertex_buffers[index] = Binding{
.cpu_addr = *cpu_addr,
.size = size,
@@ -1382,7 +1380,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
template <class P>
void BufferCache<P>::UpdateTransformFeedbackBuffers() {
if (maxwell3d.regs.tfb_enabled == 0) {
if (maxwell3d->regs.tfb_enabled == 0) {
return;
}
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1392,10 +1390,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() {
template <class P>
void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
const auto& binding = maxwell3d.regs.tfb_bindings[index];
const auto& binding = maxwell3d->regs.tfb_bindings[index];
const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
const u32 size = binding.buffer_size;
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
transform_feedback_buffers[index] = NULL_BINDING;
return;
@@ -1414,10 +1412,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
Binding& binding = compute_uniform_buffers[index];
binding = NULL_BINDING;
const auto& launch_desc = kepler_compute.launch_description;
const auto& launch_desc = kepler_compute->launch_description;
if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
const auto& cbuf = launch_desc.const_buffer_config[index];
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
if (cpu_addr) {
binding.cpu_addr = *cpu_addr;
binding.size = cbuf.size;
@@ -1567,6 +1565,8 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
auto& new_buffer = slot_buffers[new_buffer_id];
runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0);
for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
}
@@ -1695,7 +1695,7 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
template <class P>
bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
std::span<u8> inlined_buffer) {
std::span<const u8> inlined_buffer) {
const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
if (!is_dirty) {
return false;
@@ -1831,7 +1831,7 @@ void BufferCache<P>::NotifyBufferDeletion() {
dirty_uniform_buffers.fill(~u32{0});
uniform_buffer_binding_sizes.fill({});
}
auto& flags = maxwell3d.dirty.flags;
auto& flags = maxwell3d->dirty.flags;
flags[Dirty::IndexBuffer] = true;
flags[Dirty::VertexBuffers] = true;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
@@ -1841,16 +1841,18 @@ void BufferCache<P>::NotifyBufferDeletion() {
}
template <class P>
typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr,
bool is_written) const {
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr || size == 0) {
return NULL_BINDING;
}
const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
const Binding binding{
.cpu_addr = *cpu_addr,
.size = size,
.size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
.buffer_id = BufferId{},
};
return binding;
@@ -1859,7 +1861,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
template <class P>
typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
GPUVAddr gpu_addr, u32 size, PixelFormat format) {
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
TextureBufferBinding binding;
if (!cpu_addr || size == 0) {
binding.cpu_addr = 0;

View File

@@ -2,20 +2,22 @@
// SPDX-License-Identifier: MIT
#include <bit>
#include "command_classes/host1x.h"
#include "command_classes/nvdec.h"
#include "command_classes/vic.h"
#include "video_core/cdma_pusher.h"
#include "video_core/command_classes/sync_manager.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/host1x/control.h"
#include "video_core/host1x/host1x.h"
#include "video_core/host1x/nvdec.h"
#include "video_core/host1x/nvdec_common.h"
#include "video_core/host1x/sync_manager.h"
#include "video_core/host1x/vic.h"
#include "video_core/memory_manager.h"
namespace Tegra {
CDmaPusher::CDmaPusher(GPU& gpu_)
: gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
host1x_processor(std::make_unique<Host1x>(gpu)),
sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_)
: host1x{host1x_}, nvdec_processor(std::make_shared<Host1x::Nvdec>(host1x)),
vic_processor(std::make_unique<Host1x::Vic>(host1x, nvdec_processor)),
host1x_processor(std::make_unique<Host1x::Control>(host1x)),
sync_manager(std::make_unique<Host1x::SyncptIncrManager>(host1x)) {}
CDmaPusher::~CDmaPusher() = default;
@@ -109,16 +111,17 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
case ThiMethod::SetMethod1:
LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
static_cast<u32>(vic_thi_state.method_0), data);
vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), data);
vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0),
data);
break;
default:
break;
}
break;
case ChClassId::Host1x:
case ChClassId::Control:
// This device is mainly for syncpoint synchronization
LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
host1x_processor->ProcessMethod(static_cast<Host1x::Method>(offset), data);
host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data);
break;
default:
UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));

View File

@@ -12,11 +12,13 @@
namespace Tegra {
class GPU;
namespace Host1x {
class Control;
class Host1x;
class Nvdec;
class SyncptIncrManager;
class Vic;
} // namespace Host1x
enum class ChSubmissionMode : u32 {
SetClass = 0,
@@ -30,7 +32,7 @@ enum class ChSubmissionMode : u32 {
enum class ChClassId : u32 {
NoClass = 0x0,
Host1x = 0x1,
Control = 0x1,
VideoEncodeMpeg = 0x20,
VideoEncodeNvEnc = 0x21,
VideoStreamingVi = 0x30,
@@ -88,7 +90,7 @@ enum class ThiMethod : u32 {
class CDmaPusher {
public:
explicit CDmaPusher(GPU& gpu_);
explicit CDmaPusher(Host1x::Host1x& host1x);
~CDmaPusher();
/// Process the command entry
@@ -101,11 +103,11 @@ private:
/// Write arguments value to the ThiRegisters member at the specified offset
void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument);
GPU& gpu;
std::shared_ptr<Tegra::Nvdec> nvdec_processor;
std::unique_ptr<Tegra::Vic> vic_processor;
std::unique_ptr<Tegra::Host1x> host1x_processor;
std::unique_ptr<SyncptIncrManager> sync_manager;
Host1x::Host1x& host1x;
std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
std::unique_ptr<Tegra::Host1x::Vic> vic_processor;
std::unique_ptr<Tegra::Host1x::Control> host1x_processor;
std::unique_ptr<Host1x::SyncptIncrManager> sync_manager;
ChClassId current_class{};
ThiRegisters vic_thi_state{};
ThiRegisters nvdec_thi_state{};

View File

@@ -1,29 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "video_core/command_classes/host1x.h"
#include "video_core/gpu.h"
Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
Tegra::Host1x::~Host1x() = default;
void Tegra::Host1x::ProcessMethod(Method method, u32 argument) {
switch (method) {
case Method::LoadSyncptPayload32:
syncpoint_value = argument;
break;
case Method::WaitSyncpt:
case Method::WaitSyncpt32:
Execute(argument);
break;
default:
UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
break;
}
}
void Tegra::Host1x::Execute(u32 data) {
gpu.WaitFence(data, syncpoint_value);
}

View File

@@ -0,0 +1,40 @@
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#include "common/assert.h"
#include "video_core/control/channel_state.h"
#include "video_core/dma_pusher.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/engines/puller.h"
#include "video_core/memory_manager.h"
namespace Tegra::Control {
ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
void ChannelState::Init(Core::System& system, GPU& gpu) {
ASSERT(memory_manager);
dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>();
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
initialized = true;
}
void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
dma_pusher->BindRasterizer(rasterizer);
memory_manager->BindRasterizer(rasterizer);
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
kepler_memory->BindRasterizer(rasterizer);
kepler_compute->BindRasterizer(rasterizer);
maxwell_dma->BindRasterizer(rasterizer);
}
} // namespace Tegra::Control

View File

@@ -0,0 +1,68 @@
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <memory>
#include "common/common_types.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra {
class GPU;
namespace Engines {
class Puller;
class Fermi2D;
class Maxwell3D;
class MaxwellDMA;
class KeplerCompute;
class KeplerMemory;
} // namespace Engines
class MemoryManager;
class DmaPusher;
namespace Control {
struct ChannelState {
explicit ChannelState(s32 bind_id);
ChannelState(const ChannelState& state) = delete;
ChannelState& operator=(const ChannelState&) = delete;
ChannelState(ChannelState&& other) noexcept = default;
ChannelState& operator=(ChannelState&& other) noexcept = default;
void Init(Core::System& system, GPU& gpu);
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
s32 bind_id = -1;
/// 3D engine
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
/// 2D engine
std::unique_ptr<Engines::Fermi2D> fermi_2d;
/// Compute engine
std::unique_ptr<Engines::KeplerCompute> kepler_compute;
/// DMA engine
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
std::shared_ptr<MemoryManager> memory_manager;
std::unique_ptr<DmaPusher> dma_pusher;
bool initialized{};
};
} // namespace Control
} // namespace Tegra

View File

@@ -0,0 +1,14 @@
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#include "video_core/control/channel_state_cache.inc"
namespace VideoCommon {
ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
: maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
gpu_memory{*channel_state.memory_manager} {}
template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
} // namespace VideoCommon

View File

@@ -0,0 +1,101 @@
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <deque>
#include <limits>
#include <mutex>
#include <optional>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
namespace Tegra {
namespace Engines {
class Maxwell3D;
class KeplerCompute;
} // namespace Engines
class MemoryManager;
namespace Control {
struct ChannelState;
}
} // namespace Tegra
namespace VideoCommon {
class ChannelInfo {
public:
ChannelInfo() = delete;
explicit ChannelInfo(Tegra::Control::ChannelState& state);
ChannelInfo(const ChannelInfo& state) = delete;
ChannelInfo& operator=(const ChannelInfo&) = delete;
ChannelInfo(ChannelInfo&& other) = default;
ChannelInfo& operator=(ChannelInfo&& other) = default;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
};
template <class P>
class ChannelSetupCaches {
public:
/// Operations for seting the channel of execution.
virtual ~ChannelSetupCaches();
/// Create channel state.
virtual void CreateChannel(Tegra::Control::ChannelState& channel);
/// Bind a channel for execution.
void BindToChannel(s32 id);
/// Erase channel's state.
void EraseChannel(s32 id);
Tegra::MemoryManager* GetFromID(size_t id) const {
std::unique_lock<std::mutex> lk(config_mutex);
const auto ref = address_spaces.find(id);
return ref->second.gpu_memory;
}
std::optional<size_t> getStorageID(size_t id) const {
std::unique_lock<std::mutex> lk(config_mutex);
const auto ref = address_spaces.find(id);
if (ref == address_spaces.end()) {
return std::nullopt;
}
return ref->second.storage_id;
}
protected:
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
P* channel_state;
size_t current_channel_id{UNSET_CHANNEL};
size_t current_address_space{};
Tegra::Engines::Maxwell3D* maxwell3d;
Tegra::Engines::KeplerCompute* kepler_compute;
Tegra::MemoryManager* gpu_memory;
std::deque<P> channel_storage;
std::deque<size_t> free_channel_ids;
std::unordered_map<s32, size_t> channel_map;
std::vector<size_t> active_channel_ids;
struct AddresSpaceRef {
size_t ref_count;
size_t storage_id;
Tegra::MemoryManager* gpu_memory;
};
std::unordered_map<size_t, AddresSpaceRef> address_spaces;
mutable std::mutex config_mutex;
virtual void OnGPUASRegister([[maybe_unused]] size_t map_id) {}
};
} // namespace VideoCommon

View File

@@ -0,0 +1,86 @@
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include "video_core/control/channel_state.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
namespace VideoCommon {
template <class P>
ChannelSetupCaches<P>::~ChannelSetupCaches() = default;
template <class P>
void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
std::unique_lock<std::mutex> lk(config_mutex);
ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
auto new_id = [this, &channel]() {
if (!free_channel_ids.empty()) {
auto id = free_channel_ids.front();
free_channel_ids.pop_front();
new (&channel_storage[id]) P(channel);
return id;
}
channel_storage.emplace_back(channel);
return channel_storage.size() - 1;
}();
channel_map.emplace(channel.bind_id, new_id);
if (current_channel_id != UNSET_CHANNEL) {
channel_state = &channel_storage[current_channel_id];
}
active_channel_ids.push_back(new_id);
auto as_it = address_spaces.find(channel.memory_manager->GetID());
if (as_it != address_spaces.end()) {
as_it->second.ref_count++;
return;
}
AddresSpaceRef new_gpu_mem_ref{
.ref_count = 1,
.storage_id = address_spaces.size(),
.gpu_memory = channel.memory_manager.get(),
};
address_spaces.emplace(channel.memory_manager->GetID(), new_gpu_mem_ref);
OnGPUASRegister(channel.memory_manager->GetID());
}
/// Bind a channel for execution.
template <class P>
void ChannelSetupCaches<P>::BindToChannel(s32 id) {
std::unique_lock<std::mutex> lk(config_mutex);
auto it = channel_map.find(id);
ASSERT(it != channel_map.end() && id >= 0);
current_channel_id = it->second;
channel_state = &channel_storage[current_channel_id];
maxwell3d = &channel_state->maxwell3d;
kepler_compute = &channel_state->kepler_compute;
gpu_memory = &channel_state->gpu_memory;
current_address_space = gpu_memory->GetID();
}
/// Erase channel's channel_state.
template <class P>
void ChannelSetupCaches<P>::EraseChannel(s32 id) {
std::unique_lock<std::mutex> lk(config_mutex);
const auto it = channel_map.find(id);
ASSERT(it != channel_map.end() && id >= 0);
const auto this_id = it->second;
free_channel_ids.push_back(this_id);
channel_map.erase(it);
if (this_id == current_channel_id) {
current_channel_id = UNSET_CHANNEL;
channel_state = nullptr;
maxwell3d = nullptr;
kepler_compute = nullptr;
gpu_memory = nullptr;
} else if (current_channel_id != UNSET_CHANNEL) {
channel_state = &channel_storage[current_channel_id];
}
active_channel_ids.erase(
std::find(active_channel_ids.begin(), active_channel_ids.end(), this_id));
}
} // namespace VideoCommon

View File

@@ -0,0 +1,32 @@
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#include <memory>
#include "common/assert.h"
#include "video_core/control/channel_state.h"
#include "video_core/control/scheduler.h"
#include "video_core/gpu.h"
namespace Tegra::Control {
Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
Scheduler::~Scheduler() = default;
void Scheduler::Push(s32 channel, CommandList&& entries) {
std::unique_lock lk(scheduling_guard);
auto it = channels.find(channel);
ASSERT(it != channels.end());
auto channel_state = it->second;
gpu.BindChannel(channel_state->bind_id);
channel_state->dma_pusher->Push(std::move(entries));
channel_state->dma_pusher->DispatchCalls();
}
void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
s32 channel = new_channel->bind_id;
std::unique_lock lk(scheduling_guard);
channels.emplace(channel, new_channel);
}
} // namespace Tegra::Control

View File

@@ -0,0 +1,37 @@
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <memory>
#include <mutex>
#include <unordered_map>
#include "video_core/dma_pusher.h"
namespace Tegra {
class GPU;
namespace Control {
struct ChannelState;
class Scheduler {
public:
explicit Scheduler(GPU& gpu_);
~Scheduler();
void Push(s32 channel, CommandList&& entries);
void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
private:
std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
std::mutex scheduling_guard;
GPU& gpu;
};
} // namespace Control
} // namespace Tegra

Some files were not shown because too many files have changed in this diff Show More