Compare commits

..

204 Commits

Author SHA1 Message Date
Feng Chen
5987044918 Detect depth mode by viewport 2021-09-14 22:46:59 +08:00
bunnei
081ccc6441 Merge pull request #7009 from ameerj/main_process_cleanup
core: Destroy main_process during shutdown
2021-09-13 23:32:43 -07:00
ameerj
67f7a6c398 core: Destroy main_process during shutdown
The main_process was never being cleaned up, causing a noticeable memory leak after subsequent launches. This change cleans up the memory during Core Shutdown, mitigating the leak.
2021-09-13 23:44:52 -04:00
Morph
d86a9b9a4b Merge pull request #6943 from FernandoS27/omae-wa-mou-shindeiru
Vulkan: Disable VK_EXT_SAMPLER_FILTER_MINMAX in GCN AMD
2021-09-13 17:33:15 -04:00
Fernando Sahmkow
7a712da2b3 Vulkan: Disable VK_EXT_SAMPLER_FILTER_MINMAX in GCN AMD since it's broken. 2021-09-13 23:29:57 +02:00
Morph
62e88d0e74 Merge pull request #7006 from FernandoS27/a-motherfucking-driver
Vulkan: Blacklist Int8Float16 Extension on AMD on driver 21.9.1
2021-09-13 17:25:56 -04:00
Mai M
edf3da346f Merge pull request #7005 from Morph1984/enum-bitwise-shift-ops
common_funcs: Add enum flag bitwise shift operator overloads
2021-09-13 17:21:22 -04:00
Morph
fde9b84b21 Merge pull request #6944 from FernandoS27/dear-drunk-me
Vulkan/Descriptors: Increase sets per pool on AMD propietary driver.
2021-09-13 17:20:07 -04:00
Fernando Sahmkow
e7c8a0bb23 Vulkan: Blacklist Int8Float16 Extension on AMD on driver 21.9.1 2021-09-13 23:17:37 +02:00
Fernando S
1bb28dfe2c Merge pull request #7001 from ameerj/wario-fix
vk_rasterizer: Fix dynamic StencilOp updating when two faces are enabled
2021-09-13 23:16:59 +02:00
Fernando Sahmkow
e7ca37b1e5 Vulkan/Descriptors: Increase sets per pool on AMFD propietary driver. 2021-09-13 23:09:18 +02:00
Morph
3512cae623 common_funcs: Add enum flag bitwise shift operator overloads
This adds bitwise shift operator overloads (<<, >>, <<=, >>=) in the macro DECLARE_ENUM_FLAG_OPERATORS(type)
2021-09-13 16:01:20 -04:00
Ameer J
d180fd7c36 Merge pull request #7000 from Morph1984/create-dir-comment
FS: Mark recursive CreateDirectory as inaccurate and temporary
2021-09-12 21:06:52 -04:00
Mai M
e4318d2207 Merge pull request #7002 from ameerj/vk-state-unused
vk_state_tracker: Remove unused function
2021-09-12 17:31:56 -04:00
ameerj
678f73069f vk_rasterizer: Fix dynamic StencilOp updating when two faces are enabled
This function was incorrectly using the stencil_two_side_enable register when dynamically updating the StencilOp.
2021-09-12 16:19:12 -04:00
ameerj
8e289ade15 vk_state_tracker: Remove unused function 2021-09-12 15:28:24 -04:00
Morph
727f607e00 FS: Mark recursive CreateDirectory as inaccurate and temporary 2021-09-12 14:06:01 -04:00
Morph
9248442bb2 Merge pull request #6948 from ameerj/amd-warp-fix
shaders: Fix warp instructions on 64-thread warp devices
2021-09-12 13:53:29 -04:00
Morph
4ab549e62a Merge pull request #6975 from ogniK5377/acc-async-ctx
account: EnsureTokenIdCacheAsync
2021-09-12 12:03:10 -04:00
Morph
f0f416e85c Merge pull request #6974 from ogniK5377/fs-recursive-createdir
FS: Recursively create directories for CreateDirectory
2021-09-12 12:02:39 -04:00
Morph
9907302465 Merge pull request #6997 from ameerj/stop-emulation-confirmation
main: Apply confirm exit setting in exit locked scenarios
2021-09-12 12:01:57 -04:00
Morph
3428232bca Merge pull request #6992 from german77/brains
hid/am: Stub SetTouchScreenConfiguration and implement GetNotificationStorageChannelEvent
2021-09-12 12:01:43 -04:00
Morph
74030eb427 Merge pull request #6987 from Morph1984/common-error
common: Move error handling functions out of common_funcs
2021-09-12 12:01:23 -04:00
Morph
47b6f522bd Merge pull request #6986 from Morph1984/version-update
api_version: Update and add AtmosphereTargetFirmware
2021-09-12 12:01:11 -04:00
ameerj
188cf1aed2 main: Apply confirm exit setting in exit locked scenarios
Some titles set an exit lock through HLE, which prompts an exit confirmation when stopping emulation if the system is locked.
This change allows bypassing this confirmation if the setting to confirm exits has been disabled by the user.
2021-09-12 00:31:32 -04:00
Morph
e67463df24 shader_environment: Add missing <algorithm> include 2021-09-11 17:19:16 -04:00
Morph
63b4c8f9f7 vk_descriptor_pool: Add missing <algorithm> include 2021-09-11 17:19:16 -04:00
Morph
76abf55f25 slot_vector: Add missing <algorithm> include 2021-09-11 17:19:15 -04:00
Morph
554c46d186 video_core/memory_manager: Add missing <algorithm> include 2021-09-11 17:19:15 -04:00
Morph
6f307f1521 kernel: Add missing <functional> include 2021-09-11 17:19:15 -04:00
Morph
4a6a73e887 file_sys/kernel_executable: Add missing <string> include 2021-09-11 17:19:14 -04:00
Morph
ae028ddf22 codec: Add missing <string_view> include 2021-09-11 17:19:14 -04:00
Morph
eb1e3f19bb common_funcs: Replace <algorithm> with <iterator> 2021-09-11 17:19:14 -04:00
Morph
290afc00d3 common: Move error handling to error.cpp/h
This allows us to avoid implicitly including <string> every time common_funcs.h is included.
2021-09-11 17:19:14 -04:00
Fernando S
be4e192903 Merge pull request #6846 from ameerj/nvdec-gpu-decode
nvdec: Add GPU video decoding for all capable drivers and platforms
2021-09-11 23:11:32 +02:00
Fernando S
82c867164b Merge pull request #6901 from ameerj/vk-clear-bits
vk_rasterizer: Only clear depth/stencil buffers when specified in attachment aspect mask
2021-09-11 22:36:22 +02:00
Fernando S
ec6490f5ad Merge pull request #6941 from ameerj/swapchain-srgb
vk_swapchain: Prefer linear swapchain format when presenting sRGB images
2021-09-11 22:36:03 +02:00
Fernando S
472aad69db Merge pull request #6953 from ameerj/anv-semaphore
renderer_vulkan: Wait on present semaphore at queue submit
2021-09-11 22:35:52 +02:00
Fernando S
55854c807d Merge pull request #6981 from ameerj/nvflinger-hb-format
nvflinger: Use external surface format for framebuffer creation
2021-09-11 22:35:25 +02:00
german77
9bddcdac69 am: Implement GetNotificationStorageChannelEvent 2021-09-10 12:24:50 -05:00
german77
a7bbd37f81 hid: Stub SetTouchScreenConfiguration 2021-09-10 12:24:28 -05:00
Morph
c9710f6c78 api_version: Update and add AtmosphereTargetFirmware 2021-09-10 01:10:47 -04:00
bunnei
7e9163779d Merge pull request #6962 from vonchenplus/spirv_support_legacy_attribute
renderer_vulkan: Spirv support glsl  legacy attribute
2021-09-08 14:04:44 -07:00
Chloe
005b0e68db Addressed issues
Co-authored-by: Mai M. <mathew1800@gmail.com>
2021-09-09 03:00:08 +10:00
Chloe Marcec
543081e4a1 Mark is_complete as atomic 2021-09-09 00:10:52 +10:00
Chloe Marcec
89958e27aa Addressed issues 2021-09-09 00:09:04 +10:00
Fernando S
6b16f7807e Merge pull request #6980 from vonchenplus/fix_blend_equation_error
Fix blend equation enum error
2021-09-08 11:50:26 +02:00
Ameer J
eb1ba45c39 Merge pull request #6971 from bunnei/buffer-queue-kevent
core: hle: service: buffer_queue: Improve management of KEvent.
2021-09-08 00:34:36 -04:00
Feng Chen
b1e655f898 Detail adjustment 2021-09-08 10:30:00 +08:00
Feng Chen
bbc1800c1b Detail adjustment 2021-09-08 09:53:10 +08:00
Feng Chen
e5ca733722 Re-implement get unused location 2021-09-07 13:22:52 +08:00
Feng Chen
9cdf2383e9 Move attribute related definitions to spirv anonymous namespace 2021-09-07 12:34:35 +08:00
ameerj
9e2bf49677 nvflinger: Use external surface format for framebuffer creation
The format member the IGBPBuffer may not always specify the correct desired format. Using the external format member ensures a valid format is provided when creating the framebuffer.

Fixes homebrew using the wrong framebuffer format.
2021-09-06 23:14:31 -04:00
Ameer J
ab73787d8f Merge pull request #6977 from Moonlacer/master
Second part of Golden's PR #6976
2021-09-06 22:58:23 -04:00
Ameer J
743428e025 Merge pull request #6976 from goldenx86/patch-2
Rename all shader cache strings to pipeline cache
2021-09-06 22:58:03 -04:00
Feng Chen
0292374807 Fix blend equation enum error 2021-09-07 10:12:09 +08:00
Moonlacer
bdd153bc0d Second part of Golden's PR 2021-09-06 15:25:40 -05:00
Matías Locatti
296fa4e06e Rename all shader cache references to pipeline cache
After Hades, both OpenGL and Vulkan use a pipeline cache instead of single stages of the graphics pipeline. Renamed the Remove menu entries to match.
2021-09-06 15:53:04 -03:00
Chloe Marcec
9141816b10 address name shadowing with system 2021-09-06 22:13:51 +10:00
Chloe Marcec
4e2aa50cef account: EnsureTokenIdCacheAsync
Closes #2547, #6946
2021-09-06 21:16:21 +10:00
bunnei
51ccc29cdd Merge pull request #6965 from bunnei/cpu_manager_jthread
core: cpu_manager: Use jthread.
2021-09-06 03:49:14 -07:00
Chloe Marcec
0b891c9245 FS: Recursively create directories for CreateDirectory
Originally we only created the parent directory, this caused issues for creating directories which also contained subdirectories, eg `/Folder1/Folder2`

This allows the ultimate mod manager homebrew to at least boot
2021-09-06 19:35:55 +10:00
Feng Chen
1de9e4e121 Dynamic get unused location 2021-09-06 10:46:03 +08:00
Feng Chen
d994466a08 Implement intput and output fixed fnc textures 2021-09-06 10:36:45 +08:00
bunnei
e05bfd2f54 core: hle: service: buffer_queue: Improve management of KEvent. 2021-09-04 22:25:46 -07:00
bunnei
d9ce179ec2 Merge pull request #6968 from bunnei/nvflinger-event
core: hle: service: nvflinger/vi: Improve management of KEvent.
2021-09-04 22:25:20 -07:00
bunnei
fb3e9314b9 core: hle: service: nvflinger/vi: Improve management of KEvent. 2021-09-03 21:53:00 -07:00
bunnei
25a97e0139 core: cpu_manager: Use jthread. 2021-09-03 19:05:41 -07:00
Feng Chen
a7bbaa4897 Rename parameters 2021-09-03 23:52:20 +08:00
Feng Chen
cf26f375ff Fix create GraphicsPipelines crash 2021-09-03 22:55:53 +08:00
ameerj
7d854fbdb0 renderer_vulkan: Wait on present semaphore at queue submit
The present semaphore is being signalled by the call to acquire the
swapchain image. This semaphore is meant to be waited on when rendering
to the swapchain image. Currently it is waited on when presenting, but
moving its usage to be waited on in the command buffer submission allows
for proper usage of this semaphore.

Fixes the device lost when launching titles on the Intel Linux Mesa driver.
2021-09-02 13:13:20 -04:00
Feng Chen
1e2a89d306 Add input/output location 2021-09-02 23:34:51 +08:00
bunnei
b2572a56d3 Merge pull request #6900 from ameerj/attr-reorder
structured_control_flow: Add DemoteCombinationPass
2021-09-01 17:36:26 -07:00
Mai M
25444041d0 Merge pull request #6951 from german77/log
common/logging: Add missing include
2021-09-01 20:21:15 -04:00
german77
c57e0b3b24 common/logging: Add missing include 2021-09-01 19:13:33 -05:00
ameerj
d956fb3c7c emit_glsl_warp: Fix shuffle ops for 64-thread warp sizes 2021-08-31 16:11:25 -04:00
ameerj
5b45dfe971 emit_glsl_warp: Fix ballot related ops for 64-thread warp sizes 2021-08-31 16:11:25 -04:00
ameerj
a5d9dcf3d9 emit_spirv_warp: Fix shuffle ops for 64-thread warp sizes 2021-08-31 13:40:39 -04:00
ameerj
95213270ef emit_spirv_warp: Fix ballot related ops for 64-thread warp sizes 2021-08-31 13:40:12 -04:00
bunnei
956171f024 Merge pull request #6897 from FernandoS27/pineapple-does-not-belong-in-pizza
Project <tentative title>: Rework Garbage Collection.
2021-08-31 09:11:21 -07:00
Feng Chen
73b11f390e Add colorfront and txtcoord support 2021-09-01 00:07:25 +08:00
bunnei
122eb3cbd0 Merge pull request #6928 from ameerj/spirv-get-frontface
emit_spirv_context_get_set: Fix Get FrontFace return value
2021-08-30 18:16:31 -07:00
bunnei
ec19d9594f Merge pull request #6879 from ameerj/decoder-assert
vk_blit_screen: Fix non-accelerated texture size calculation
2021-08-30 15:24:04 -07:00
ameerj
907dfbea71 structured_control_flow: Skip reordering nested demote branches.
Nested demote branches add complexity with combining the condition if it has not been initialized yet. Skip them for the time being.
2021-08-30 11:46:25 -04:00
ameerj
4fda7f1c82 structured_control_flow: Conditionally invoke demote reorder pass
This is only needed on select drivers when a fragment shader discards/demotes.
2021-08-30 11:46:24 -04:00
Fernando Sahmkow
fe0acec539 Garbage Collection: Make it more agressive on high priority mode. 2021-08-29 18:57:17 +02:00
Fernando Sahmkow
ff48f06fb9 Garbage Collection: Adress Feedback. 2021-08-29 18:19:53 +02:00
bunnei
5f19b66189 Merge pull request #6905 from Morph1984/nifm-misc
nifm/network_interface: Cleanup and populate fields in GetCurrentNetworkProfile
2021-08-29 00:04:58 -07:00
ameerj
27f8f3333f vulkan_device: Enable VK_KHR_swapchain_mutable_format if available
Silences validation errors when creating sRGB image views of linear swapchain images
2021-08-29 02:03:36 -04:00
ameerj
3c65c8580f vk_swapchain: Prefer linear swapchain format when presenting sRGB images
Fixes broken sRGB when presenting from a secondary GPU.
2021-08-29 02:03:35 -04:00
bunnei
4e88989435 Merge pull request #6921 from ameerj/vp9-unused
vp9_types: Remove unusued VP9 info struct members
2021-08-28 22:20:09 -07:00
Morph
7d2265b6a8 Merge pull request #6927 from german77/ngct
ngct: Stub NGCT:U service
2021-08-28 23:03:29 -04:00
Fernando Sahmkow
ba82bb359b Garbage Collection: enable as default, eliminate option. 2021-08-28 17:55:37 +02:00
Fernando Sahmkow
d540d284b5 VideoCore: Rework Garbage Collection. 2021-08-28 17:54:12 +02:00
ameerj
862dc2b2b3 structured_control_flow: Add DemoteCombinationPass
Some drivers misread data when demotes are interleaved in the program. This moves demote branches to be checked at the end of the program.
Fixes "wireframe" issue in Pokemon SwSh on some drivers
2021-08-28 11:35:25 -04:00
german77
f134a5e56c ngct: Stub NGCT:U service 2021-08-27 14:15:34 -05:00
bunnei
c20ea89390 Merge pull request #6929 from yuzu-emu/revert-6870-trace-back-stack-back-stack-back
Revert "logging: Display backtrace on crash"
2021-08-27 02:23:01 -07:00
Morph
790a09bc93 Revert "logging: Display backtrace on crash" 2021-08-27 04:01:22 -04:00
Morph
c1e2063c0d service: nifm: Populate fields in GetCurrentNetworkProfile
Populates the current_address, subnet_mask, and gateway fields from the selected network interface.
2021-08-27 02:10:59 -04:00
Morph
878d0225c5 service: nifm: Cleanup GetCurrentIpConfigInfo 2021-08-27 02:10:58 -04:00
Morph
871e1c6315 network_interface: Cleanup code 2021-08-27 02:10:58 -04:00
Morph
a32a7dacf4 network_interface: Replace default return value with std::nullopt 2021-08-27 02:10:58 -04:00
ameerj
6e407c02d8 emit_spirv_context_get_set: Fix Get FrontFace return value
The IR expects GetAttribute to return an F32 value. This case was returning a U32 instead.
2021-08-26 21:37:34 -04:00
bunnei
d10d480642 Merge pull request #6870 from yzct12345/trace-back-stack-back-stack-back
logging: Display backtrace on crash
2021-08-26 17:41:14 -07:00
bunnei
98c2f0e576 Merge pull request #6922 from yuzu-emu/revert-6832-scheduler-improvements
Revert "kernel: Various improvements to scheduler"
2021-08-25 21:00:18 -07:00
bunnei
0c8594b225 Revert "kernel: Various improvements to scheduler" 2021-08-25 20:59:28 -07:00
ameerj
eb2624ed65 vp9_types: Minor refactor of VP9 info structs. 2021-08-25 21:42:43 -04:00
ameerj
3de38c9a70 vp9_types: Remove unused Vp9PictureInfo members 2021-08-25 21:29:22 -04:00
Fernando S
3843995ceb Merge pull request #6919 from ameerj/vk-int8-capability
vulkan_device: Add a check for int8 support
2021-08-25 23:46:08 +02:00
Ameer J
de71a4d70d Merge pull request #6894 from FernandoS27/bunneis-left-ear
GPU_MemoryManger: Fix GetSubmappedRange.
2021-08-25 16:50:03 -04:00
bunnei
4654fb96b0 Merge pull request #6917 from ameerj/log-init-fix
logging: Fix log filter during initialization
2021-08-24 18:31:27 -07:00
ameerj
4d535799eb vulkan_device: Add a check for int8 support
Silences validation errors when shaders use int8 without specifying its support to the API
2021-08-24 21:22:41 -04:00
ameerj
84b4ac5729 logging: Fix log filter during initialization
The log filter was being ignored on initialization due to the logging instance being initialized before the config instance, so the log filter was set to its default value.

This fixes that oversight, along with using descriptive exceptions instead of abort() calls.
2021-08-24 01:32:38 -04:00
Ameer J
bed0c3c92a Merge pull request #6878 from BreadFish64/optimize-GetHostThreadID
kernel: Optimize GetHostThreadID
2021-08-24 00:01:13 -04:00
bunnei
7eb4542c1d Merge pull request #6912 from lioncash/plural
CMakeLists: Ensure proper numerusform tags are generated for pluralized translations
2021-08-23 16:40:31 -07:00
bunnei
f65f8b9097 Merge pull request #6869 from yzct12345/shiny-logs-in-the-fireplace
logging: Simplify and make thread-safe
2021-08-22 20:40:18 -07:00
Lioncash
6fef91ce4c CMakeLists: Ensure proper numerusform tags are generated for pluralized translations
If we don't set an explicit source and target language for the base
english translation, then we'll generate an incorrect number of
<numerusform> tags (which Transifex doesn't like).
2021-08-22 11:52:37 -04:00
Ameer J
a428a843cb Merge pull request #6904 from Morph1984/lang-settings-range
settings: Amend language_index maximum setting range
2021-08-21 15:39:55 -04:00
Morph
c4a1d3cbf4 settings: Amend language_index maximum setting range
The maximum is now 17 with the addition of Brazilian Portuguese
2021-08-21 09:43:15 -04:00
ameerj
e0397f00d0 vk_rasterizer: Only clear depth and stencil buffers when set in attachment aspect mask
Silences validation errors for clearing the depth/stencil buffers of framebuffer attachments that were not specified to have depth/stencil usage.
2021-08-21 02:37:15 -04:00
Ameer J
bde6b899a1 Merge pull request #6888 from v1993/patch-3
video_core: eliminate constant ternary
2021-08-21 00:16:18 -04:00
bunnei
455e28790a Merge pull request #6877 from MerryMage/dyn-ignore-asserts
dynarmic: Update and enable DYNARMIC_IGNORE_ASSERTS
2021-08-19 15:53:14 -07:00
Mai M
519978ce70 Merge pull request #6887 from v1993/patch-2
SPIR-V: Merge two ifs in EmitGetAttribute
2021-08-19 17:16:37 -04:00
Mai M
0821d95399 Merge pull request #6886 from Morph1984/error-code-64
applet_error: Fix 64-bit error code conversion
2021-08-19 17:15:49 -04:00
Mai M
155b99ee55 Merge pull request #6890 from v1993/patch-5
Replace QPoint with QPointF where applicable
2021-08-19 17:14:48 -04:00
Fernando Sahmkow
ef2066b272 GPU_MemoryManger: Fix GetSubmappedRange. 2021-08-19 22:57:22 +02:00
Valeri
d82fc52cea Replace QPoint with QPointF where applicable
Previously, floats were implicitly cast to integers
2021-08-19 23:16:58 +03:00
Morph
151ab86204 Merge pull request #6889 from v1993/patch-4
qt_software_keyboard: fix copy-paste error
2021-08-19 15:32:07 -04:00
Valeri
40674b8e22 qt_software_keyboard: fix copy-paste error 2021-08-19 22:17:15 +03:00
Valeri
4fd655cb46 video_core: eliminate constant ternary
`via_header_index` is already checked above, so it would never be true in this branch
2021-08-19 21:22:05 +03:00
Morph
9cb376f8c2 applet_error: Fix 64-bit error code conversion 2021-08-19 13:16:48 -04:00
Valeri
beb7305b73 SPIR-V: Merge two ifs in EmitGetAttribute 2021-08-19 20:13:46 +03:00
Mai M
562d2aa3d6 Merge pull request #6885 from v1993/patch-1
Fix crash in logging in CreateStrayLayer
2021-08-19 12:34:34 -04:00
Valeri
ab02addde3 Fix crash in logging in CreateStrayLayer
It was trying to log value of layer_id which is specifically known not to exist, potentially leading to segfault. Log display_id instead.
2021-08-19 19:33:07 +03:00
Morph
aa74aaf38f Merge pull request #6884 from v1993/patch-1
Fix check is thread current in GetThreadContext
2021-08-19 10:39:35 -04:00
Valeri
0b3d12be40 Fix check is thread current in GetThreadContext
Misplaced break made it only check for the first core.
2021-08-19 16:46:30 +03:00
bunnei
aa40084c24 Merge pull request #6832 from bunnei/scheduler-improvements
kernel: Various improvements to scheduler
2021-08-18 15:42:46 -07:00
ameerj
b384129c63 h264: Lower max_num_ref_frames
GPU decoding seems to be more picky when it comes to the maximum number of reference frames.
2021-08-16 14:40:53 -04:00
ameerj
cd016d3cb5 configure_graphics: Add GPU nvdec decoding as an option
Some system configurations may see visual regressions or lower performance using GPU decoding compared to CPU decoding. This setting provides the option for users to specify their decoding preference.

Co-Authored-By: yzct12345 <87620833+yzct12345@users.noreply.github.com>
2021-08-16 14:40:53 -04:00
ameerj
a832aa699f codec: Improve libav memory alloc and cleanup 2021-08-16 14:40:53 -04:00
ameerj
bc3efb79cc codec: Fallback to CPU decoding if no compatible GPU format is found 2021-08-16 14:40:53 -04:00
lat9nq
92bc51b66a cmake: Add VDPAU and NVDEC support to FFmpeg
Adds {h264_,vp9_}{nvdec,vdpau} hwaccels.
2021-08-16 14:40:52 -04:00
ameerj
537c6ac8fe vk_blit_screen: Fix non-accelerated texture size calculation
Addresses the potential OOB access in UnswizzleTexture.
2021-08-16 14:28:10 -04:00
Fernando S
521e6ac174 Merge pull request #6863 from spholz/fix-lan-play
Fix LAN Play
2021-08-16 17:10:00 +02:00
BreadFish64
14e93f133a kernel: Optimize GetHostThreadID 2021-08-16 07:30:23 -05:00
Sönke Holz
356dbf4d1d network_interface: correct formatting 2021-08-16 12:18:19 +02:00
spholz
dc47b5a5bf network_interface: fix mingw-w64 build 2021-08-16 12:06:35 +02:00
Sönke Holz
70419f7a17 network: retrieve subnet mask and gateway info 2021-08-16 10:32:25 +02:00
Merry
af6290ed12 dynarmic: Update and enable DYNARMIC_IGNORE_ASSERTS 2021-08-15 19:33:02 +01:00
Merry
1770503185 xbyak: Update include path 2021-08-15 19:26:38 +01:00
bunnei
87d63b858a Merge pull request #6861 from yzct12345/const-mempy-is-all-the-speed
decoders: Optimize memcpy for the other functions
2021-08-15 02:38:12 -07:00
bunnei
bdd617da03 Merge pull request #6868 from yzct12345/safe-threads-no-deadlocks
threadsafe_queue: Fix deadlock
2021-08-14 02:28:59 -07:00
bunnei
aef0ca6f0d core: hle: kernel: Disable dispatch count tracking on single core.
- This would have limited value, and would be a mess to handle properly.
2021-08-14 02:14:19 -07:00
yzct12345
0ba521e634 threadsafe_queue: Fix deadlock
This fixes a lost wakeup in SPSCQueue. If the reader is in just the right position, the writer's notification will be lost and this will be a problem if the writer then does something to wait on the reader.

This was discovered to affect my upcoming stacktrace PR. I don't think any performance decrease will be noticeable because an uncontended mutex is smart enough to skip the syscall. This PR might also resolve some rare deadlocks but I don't know of any examples.
2021-08-13 19:22:51 +00:00
yzct12345
81ed54d13e logging: Display backtrace on crash
This implements backtraces so we don't have to tell users how to use gdb anymore.

This prints a backtrace after abort or segfault is detected. It also fixes the log getting cut off with the last line containing only a bracket. This change lets us know what caused a crash not just what happened the few seconds before it.

I only know how to add support for Linux with GCC. Also this doesn't work outside of C/C++ such as in dynarmic or certain parts of graphics drivers. The good thing is that it'll try and just crash again but the stack frames are still there so the core dump will work just like before.
2021-08-13 18:58:35 +00:00
yzct12345
001675dced logging: Simplify and make thread-safe
This simplifies the logging system.

This also fixes some lost messages on startup.

The simplification is simple. I removed unused functions and moved most things in the .h to the .cpp. I replaced the unnecessary linked list with its contents laid out as three member variables. Anything that went through the linked list now directly accesses the backends. Generic functions are replaced with those for each specific use case and there aren't many. This change increases coupling but we gain back more KISS and encapsulation.

With those changes it was easy to make it thread-safe. I just removed the mutex and turned a boolean atomic. I was planning to use this thread-safety in my next PR about stacktraces. It was actually async-signal-safety at first but I ended up using a different approach. Anyway getting rid of the linked list is important for that because have the list of backends constantly changing complicates things.
2021-08-13 18:39:45 +00:00
Sönke Holz
068c66672d configuration: fix mingw-w64 build 2021-08-13 12:39:14 +02:00
spholz
deb65a5717 network: don't use reinterpret_cast in GetAvailableNetworkInterfaces 2021-08-13 11:58:34 +02:00
Sönke Holz
e660334a21 network: fix mingw-w64 build
The header "combaseapi.h" of mingw-w64 defines "interface" as "struct".
2021-08-13 11:23:50 +02:00
Sönke Holz
b18e1d031f network: don't use assert to check if no network interfaces are returned 2021-08-13 11:21:34 +02:00
bunnei
71d8d84b59 Merge pull request #6862 from german77/badsdl
input_common: Disable sdl raw input mode
2021-08-12 21:14:26 -07:00
Sönke Holz
0476751ee2 configuration: move network_interface include to source file 2021-08-13 02:48:39 +02:00
Sönke Holz
a0c4c1a23a network: use Common::BitCast instead of std::bit_cast 2021-08-13 01:28:14 +02:00
Sönke Holz
8513e59431 network: narrow down scope of "result" in win32 code for
GetAvailableNetworkInterfaces
2021-08-13 00:37:03 +02:00
Sönke Holz
04ec426201 configuration: use tr instead of QStringLiteral for "None" item in
network interface combobox
2021-08-13 00:34:04 +02:00
Sönke Holz
771de32af1 network: use explicit bool conversions in GetAvailableNetworkInterfaces 2021-08-13 00:31:33 +02:00
Sönke Holz
765e97c347 network: initialize ip_addr in GetHostIPv4Address() 2021-08-13 00:28:44 +02:00
Sönke Holz
acca8aca8c nifm: use operator*() instead of .value() to get value of std::optional 2021-08-13 00:24:33 +02:00
Sönke Holz
970d81abfc nifm: treat a missing host IP address as a non-critical error 2021-08-13 00:21:54 +02:00
spholz
78a8249593 Merge branch 'yuzu-emu:master' into fix-lan-play 2021-08-12 22:27:17 +02:00
Sönke Holz
21743daf38 network: correct formatting in network.cpp and network_interface.cpp 2021-08-12 22:15:48 +02:00
spholz
1e98e73828 configuration: add option to select network interface
This commit renames the "Services" tab to "Network" and adds a combobox that allows the user to select the network interface that yuzu should use. This new setting is now used to get the local IP address in Network::GetHostIPv4Address. This prevents yuzu from selecting the wrong network interface and thus using the wrong IP address. The return type of Network::GetHostIPv4Adress has also been changed.
2021-08-12 21:32:53 +02:00
bunnei
0509fe3377 Merge pull request #6838 from ameerj/sws-align
vic: Specify sws_scale height stride.
2021-08-12 11:28:33 -07:00
german77
2a2f0bfe9e input_common: Disable sdl raw input mode 2021-08-12 13:17:07 -05:00
ameerj
356e10898f codec: Replace deprecated av_init_packet usage 2021-08-12 01:28:01 -04:00
ameerj
0be4e402e2 cmake: Always find LIBVA, update windows FFmpeg version
Allows the use of VAAPI gpu decoders on system installed ffmpeg as well.
2021-08-12 01:28:01 -04:00
ameerj
659039ca6d nvdec: Implement GPU accelerated decoding for all platforms
Supplements the VAAPI intel gpu decoder by implementing the D3D11VA decoder for Windows, and CUVID/VDPAU for Nvidia and AMD on drivers linux respectively.
2021-08-12 01:28:01 -04:00
yzct12345
430255caf8 decoders: Templates allow memcpy optimizations 2021-08-12 04:45:25 +00:00
Mai M
043904bae1 Merge pull request #6855 from german77/sdl16
externals: Update sdl2 to 2.0.16
2021-08-11 23:14:53 -04:00
Mai M
756d76d971 Merge pull request #6860 from lat9nq/ranged-settings-2
settings: Fix MSVC issues
2021-08-11 17:53:09 -04:00
german77
fe2e710003 externals: Update sdl2 to 2.0.16 2021-08-10 19:16:30 -05:00
ameerj
a779cede7c vic: Specify sws_scale height stride.
Silences a sws_scale runtime warning about unaligned strides.
2021-08-09 23:24:16 -04:00
bunnei
5060a97210 core: hle: kernel: k_thread: Mark KScopedDisableDispatch as nodiscard. 2021-08-07 12:33:31 -07:00
bunnei
9e3d1d865c core: cpu_manager: Use invalid core_id on init and simplify shutdown. 2021-08-07 12:33:07 -07:00
bunnei
99bc49e76e core: hle: service: buffer_queue: Improve management of KEvent. 2021-08-07 12:18:48 -07:00
bunnei
48a3496b93 core: hle: kernel: k_auto_object: Add GetName method.
- Useful purely for debugging.
2021-08-07 12:18:48 -07:00
bunnei
36cf96857e core: hle: service: nvflinger/vi: Improve management of KEvent. 2021-08-07 12:18:47 -07:00
bunnei
5051d3c415 core: hle: kernel: DisableDispatch on suspend threads. 2021-08-07 12:18:47 -07:00
bunnei
1798c3b6b0 core: hle: kernel: k_scheduler: Improve DisableScheduling and EnableScheduling. 2021-08-07 12:18:47 -07:00
bunnei
cbe4e32d38 core: cpu_manager: Use KScopedDisableDispatch. 2021-08-07 12:18:47 -07:00
bunnei
2dfb07388a core: hle: kernel: Use CurrentPhysicalCoreIndex as appropriate. 2021-08-07 12:18:47 -07:00
bunnei
d1c502720d core: hle: kernel: k_scheduler: Remove unnecessary MakeCurrentProcess. 2021-08-07 12:18:47 -07:00
bunnei
77ad64b97d core: hle: kernel: k_scheduler: Improve ScheduleImpl. 2021-08-07 12:18:47 -07:00
bunnei
bedcf19710 core: hle: kernel: k_scheduler: Improve Unload. 2021-08-07 12:18:47 -07:00
bunnei
7569d6774d core: hle: kernel: k_process: DisableDispatch on main thread. 2021-08-07 12:18:47 -07:00
bunnei
f2b0d28983 core: hle: kernel: k_handle_table: Use KScopedDisableDispatch as necessary. 2021-08-07 12:18:47 -07:00
bunnei
01af2f4162 core: hle: kernel: k_thread: Add KScopedDisableDispatch. 2021-08-07 12:18:47 -07:00
bunnei
2b9560428b core: hle: kernel: Ensure idle threads are closed before destroying scheduler. 2021-08-07 12:18:47 -07:00
bunnei
68eee94875 core: hle: kernel: Reflect non-emulated threads as core 3. 2021-08-07 12:18:47 -07:00
bunnei
5ea0d3629a core: cpu_manager: Use jthread. 2021-08-07 12:18:47 -07:00
spholz
33ebe471e8 Merge branch 'yuzu-emu:master' into fix-lan-play 2021-08-07 02:55:19 +02:00
Sönke Holz
ddeb8d854e network: GetAndLogLastError: ignore Errno::AGAIN
If non-blocking sockets are used, they generate a lot of Errno::AGAIN errors when they didn't receive any data. These errors shouldn't be logged.
2021-08-07 02:54:25 +02:00
Sönke Holz
dd5c41b5a6 network: GetCurrentIpConfigInfo: return host IP address
Service::NIFM::IGeneralService::GetCurrentIpConfigInfo currently hardcodes 192.168.1.100 as the IP address, which prevents LAN play from working correctly.
2021-08-07 02:17:02 +02:00
Sönke Holz
652e5e3df0 network: fix fcntl cmds
F_SETFL/F_GETFL are the correct commands to set a socket to be non-blocking
2021-08-06 21:08:31 +02:00
127 changed files with 2558 additions and 1188 deletions

View File

@@ -376,7 +376,7 @@ if (ENABLE_SDL2)
if (YUZU_USE_BUNDLED_SDL2)
# Detect toolchain and platform
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
set(SDL2_VER "SDL2-2.0.15-prerelease")
set(SDL2_VER "SDL2-2.0.16")
else()
message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.")
endif()
@@ -396,7 +396,7 @@ if (ENABLE_SDL2)
elseif (YUZU_USE_EXTERNAL_SDL2)
message(STATUS "Using SDL2 from externals.")
else()
find_package(SDL2 2.0.15 REQUIRED)
find_package(SDL2 2.0.16 REQUIRED)
# Some installations don't set SDL2_LIBRARIES
if("${SDL2_LIBRARIES}" STREQUAL "")
@@ -518,6 +518,10 @@ set(FFmpeg_COMPONENTS
avutil
swscale)
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
Include(FindPkgConfig REQUIRED)
pkg_check_modules(LIBVA libva)
endif()
if (NOT YUZU_USE_BUNDLED_FFMPEG)
# Use system installed FFmpeg
find_package(FFmpeg QUIET COMPONENTS ${FFmpeg_COMPONENTS})
@@ -540,6 +544,9 @@ endif()
if (YUZU_USE_BUNDLED_FFMPEG)
if (NOT WIN32)
# TODO(lat9nq): Move this to externals/ffmpeg/CMakeLists.txt (and move externals/ffmpeg to
# externals/ffmpeg/ffmpeg)
# Build FFmpeg from externals
message(STATUS "Using FFmpeg from externals")
@@ -579,20 +586,23 @@ if (YUZU_USE_BUNDLED_FFMPEG)
CACHE PATH "Paths to FFmpeg libraries" FORCE)
endforeach()
set(FFmpeg_INCLUDE_DIR
"${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR}"
CACHE PATH "Path to FFmpeg headers" FORCE)
Include(FindPkgConfig REQUIRED)
pkg_check_modules(LIBVA libva)
pkg_check_modules(CUDA cuda)
pkg_check_modules(FFNVCODEC ffnvcodec)
pkg_check_modules(VDPAU vdpau)
set(FFmpeg_HWACCEL_LIBRARIES)
set(FFmpeg_HWACCEL_FLAGS)
set(FFmpeg_HWACCEL_INCLUDE_DIRS)
set(FFmpeg_HWACCEL_LDFLAGS)
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
Include(FindPkgConfig REQUIRED)
pkg_check_modules(LIBVA libva)
endif()
if(LIBVA_FOUND)
pkg_check_modules(LIBDRM libdrm REQUIRED)
find_package(X11 REQUIRED)
pkg_check_modules(LIBVA-DRM libva-drm REQUIRED)
pkg_check_modules(LIBVA-X11 libva-x11 REQUIRED)
set(FFmpeg_LIBVA_LIBRARIES
list(APPEND FFmpeg_HWACCEL_LIBRARIES
${LIBDRM_LIBRARIES}
${X11_LIBRARIES}
${LIBVA-DRM_LIBRARIES}
@@ -602,11 +612,56 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--enable-hwaccel=h264_vaapi
--enable-hwaccel=vp9_vaapi
--enable-libdrm)
list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
${LIBDRM_INCLUDE_DIRS}
${X11_INCLUDE_DIRS}
${LIBVA-DRM_INCLUDE_DIRS}
${LIBVA-X11_INCLUDE_DIRS}
${LIBVA_INCLUDE_DIRS}
)
message(STATUS "VA-API found")
else()
set(FFmpeg_HWACCEL_FLAGS --disable-vaapi)
endif()
if (FFNVCODEC_FOUND AND CUDA_FOUND)
list(APPEND FFmpeg_HWACCEL_FLAGS
--enable-cuvid
--enable-ffnvcodec
--enable-nvdec
--enable-hwaccel=h264_nvdec
--enable-hwaccel=vp9_nvdec
--extra-cflags=-I${CUDA_INCLUDE_DIRS}
)
list(APPEND FFmpeg_HWACCEL_LIBRARIES
${FFNVCODEC_LIBRARIES}
${CUDA_LIBRARIES}
)
list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
${FFNVCODEC_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
)
list(APPEND FFmpeg_HWACCEL_LDFLAGS
${FFNVCODEC_LDFLAGS}
${CUDA_LDFLAGS}
)
message(STATUS "ffnvcodec libraries version ${FFNVCODEC_VERSION} found")
endif()
if (VDPAU_FOUND)
list(APPEND FFmpeg_HWACCEL_FLAGS
--enable-vdpau
--enable-hwaccel=h264_vdpau
--enable-hwaccel=vp9_vdpau
)
list(APPEND FFmpeg_HWACCEL_LIBRARIES ${VDPAU_LIBRARIES})
list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS ${VDPAU_INCLUDE_DIRS})
list(APPEND FFmpeg_HWACCEL_LDFLAGS ${VDPAU_LDFLAGS})
message(STATUS "vdpau libraries version ${VDPAU_VERSION} found")
else()
list(APPEND FFmpeg_HWACCEL_FLAGS --disable-vdpau)
endif()
# `configure` parameters builds only exactly what yuzu needs from FFmpeg
# `--disable-vdpau` is needed to avoid linking issues
add_custom_command(
@@ -624,7 +679,6 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--disable-network
--disable-postproc
--disable-swresample
--disable-vdpau
--enable-decoder=h264
--enable-decoder=vp9
--cc="${CMAKE_C_COMPILER}"
@@ -653,15 +707,26 @@ if (YUZU_USE_BUNDLED_FFMPEG)
${FFmpeg_BUILD_DIR}
)
set(FFmpeg_INCLUDE_DIR
"${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR};${FFmpeg_HWACCEL_INCLUDE_DIRS}"
CACHE PATH "Path to FFmpeg headers" FORCE)
set(FFmpeg_LDFLAGS
"${FFmpeg_HWACCEL_LDFLAGS}"
CACHE STRING "FFmpeg linker flags" FORCE)
# ALL makes this custom target build every time
# but it won't actually build if the DEPENDS parameter is up to date
add_custom_target(ffmpeg-configure ALL DEPENDS ${FFmpeg_MAKEFILE})
add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_BUILD_LIBRARIES} ffmpeg-configure)
link_libraries(${FFmpeg_LIBVA_LIBRARIES})
set(FFmpeg_LIBRARIES ${FFmpeg_LIBVA_LIBRARIES} ${FFmpeg_BUILD_LIBRARIES}
set(FFmpeg_LIBRARIES ${FFmpeg_BUILD_LIBRARIES} ${FFmpeg_HWACCEL_LIBRARIES}
CACHE PATH "Paths to FFmpeg libraries" FORCE)
unset(FFmpeg_BUILD_LIBRARIES)
unset(FFmpeg_LIBVA_LIBRARIES)
unset(FFmpeg_HWACCEL_FLAGS)
unset(FFmpeg_HWACCEL_INCLUDE_DIRS)
unset(FFmpeg_HWACCEL_LDFLAGS)
unset(FFmpeg_HWACCEL_LIBRARIES)
if (FFmpeg_FOUND)
message(STATUS "Found FFmpeg version ${FFmpeg_VERSION}")
@@ -670,12 +735,13 @@ if (YUZU_USE_BUNDLED_FFMPEG)
endif()
else() # WIN32
# Use yuzu FFmpeg binaries
set(FFmpeg_EXT_NAME "ffmpeg-4.3.1")
set(FFmpeg_EXT_NAME "ffmpeg-4.4")
set(FFmpeg_PATH "${CMAKE_BINARY_DIR}/externals/${FFmpeg_EXT_NAME}")
download_bundled_external("ffmpeg/" ${FFmpeg_EXT_NAME} "")
set(FFmpeg_FOUND YES)
set(FFmpeg_INCLUDE_DIR "${FFmpeg_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE)
set(FFmpeg_LIBRARY_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg library directory" FORCE)
set(FFmpeg_LDFLAGS "" CACHE STRING "FFmpeg linker flags" FORCE)
set(FFmpeg_DLL_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE)
set(FFmpeg_LIBRARIES
${FFmpeg_LIBRARY_DIR}/swscale.lib
@@ -701,7 +767,7 @@ if (APPLE)
elseif (WIN32)
# WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista)
add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600)
set(PLATFORM_LIBRARIES winmm ws2_32)
set(PLATFORM_LIBRARIES winmm ws2_32 iphlpapi)
if (MINGW)
# PSAPI is the Process Status API
set(PLATFORM_LIBRARIES ${PLATFORM_LIBRARIES} psapi imm32 version)

View File

@@ -7,7 +7,9 @@ include(DownloadExternals)
# xbyak
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
add_library(xbyak INTERFACE)
target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/xbyak/xbyak DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
target_include_directories(xbyak SYSTEM INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
endif()
@@ -19,6 +21,7 @@ target_include_directories(catch-single-include INTERFACE catch/single_include)
if (ARCHITECTURE_x86_64)
set(DYNARMIC_TESTS OFF)
set(DYNARMIC_NO_BUNDLED_FMT ON)
set(DYNARMIC_IGNORE_ASSERTS ON CACHE BOOL "" FORCE)
add_subdirectory(dynarmic)
endif()

2
externals/SDL vendored

View File

@@ -53,6 +53,8 @@ add_library(common STATIC
div_ceil.h
dynamic_library.cpp
dynamic_library.h
error.cpp
error.h
fiber.cpp
fiber.h
fs/file.cpp
@@ -88,7 +90,6 @@ add_library(common STATIC
microprofile.cpp
microprofile.h
microprofileui.h
misc.cpp
nvidia_flags.cpp
nvidia_flags.h
page_table.cpp

View File

@@ -4,9 +4,8 @@
#pragma once
#include <algorithm>
#include <array>
#include <string>
#include <iterator>
#if !defined(ARCHITECTURE_x86_64)
#include <cstdlib> // for exit
@@ -49,16 +48,6 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
#endif // _MSC_VER ndef
// Generic function to get last error message.
// Call directly after the command or use the error num.
// This function might change the error code.
// Defined in misc.cpp.
[[nodiscard]] std::string GetLastErrorMsg();
// Like GetLastErrorMsg(), but passing an explicit error code.
// Defined in misc.cpp.
[[nodiscard]] std::string NativeErrorToString(int e);
#define DECLARE_ENUM_FLAG_OPERATORS(type) \
[[nodiscard]] constexpr type operator|(type a, type b) noexcept { \
using T = std::underlying_type_t<type>; \
@@ -72,6 +61,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
using T = std::underlying_type_t<type>; \
return static_cast<type>(static_cast<T>(a) ^ static_cast<T>(b)); \
} \
[[nodiscard]] constexpr type operator<<(type a, type b) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<type>(static_cast<T>(a) << static_cast<T>(b)); \
} \
[[nodiscard]] constexpr type operator>>(type a, type b) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<type>(static_cast<T>(a) >> static_cast<T>(b)); \
} \
constexpr type& operator|=(type& a, type b) noexcept { \
a = a | b; \
return a; \
@@ -84,6 +81,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
a = a ^ b; \
return a; \
} \
constexpr type& operator<<=(type& a, type b) noexcept { \
a = a << b; \
return a; \
} \
constexpr type& operator>>=(type& a, type b) noexcept { \
a = a >> b; \
return a; \
} \
[[nodiscard]] constexpr type operator~(type key) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<type>(~static_cast<T>(key)); \

View File

@@ -10,7 +10,9 @@
#include <cstring>
#endif
#include "common/common_funcs.h"
#include "common/error.h"
namespace Common {
std::string NativeErrorToString(int e) {
#ifdef _WIN32
@@ -50,3 +52,5 @@ std::string GetLastErrorMsg() {
return NativeErrorToString(errno);
#endif
}
} // namespace Common

21
src/common/error.h Normal file
View File

@@ -0,0 +1,21 @@
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
namespace Common {
// Generic function to get last error message.
// Call directly after the command or use the error num.
// This function might change the error code.
// Defined in error.cpp.
[[nodiscard]] std::string GetLastErrorMsg();
// Like GetLastErrorMsg(), but passing an explicit error code.
// Defined in error.cpp.
[[nodiscard]] std::string NativeErrorToString(int e);
} // namespace Common

View File

@@ -2,13 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <atomic>
#include <chrono>
#include <climits>
#include <condition_variable>
#include <memory>
#include <mutex>
#include <exception>
#include <thread>
#include <vector>
@@ -16,28 +13,174 @@
#include <windows.h> // For OutputDebugStringW
#endif
#include "common/assert.h"
#include "common/fs/file.h"
#include "common/fs/fs.h"
#include "common/fs/fs_paths.h"
#include "common/fs/path_util.h"
#include "common/literals.h"
#include "common/thread.h"
#include "common/logging/backend.h"
#include "common/logging/log.h"
#include "common/logging/text_formatter.h"
#include "common/settings.h"
#ifdef _WIN32
#include "common/string_util.h"
#endif
#include "common/threadsafe_queue.h"
namespace Common::Log {
namespace {
/**
* Interface for logging backends.
*/
class Backend {
public:
virtual ~Backend() = default;
virtual void Write(const Entry& entry) = 0;
virtual void EnableForStacktrace() = 0;
virtual void Flush() = 0;
};
/**
* Backend that writes to stderr and with color
*/
class ColorConsoleBackend final : public Backend {
public:
explicit ColorConsoleBackend() = default;
~ColorConsoleBackend() override = default;
void Write(const Entry& entry) override {
if (enabled.load(std::memory_order_relaxed)) {
PrintColoredMessage(entry);
}
}
void Flush() override {
// stderr shouldn't be buffered
}
void EnableForStacktrace() override {
enabled = true;
}
void SetEnabled(bool enabled_) {
enabled = enabled_;
}
private:
std::atomic_bool enabled{false};
};
/**
* Backend that writes to a file passed into the constructor
*/
class FileBackend final : public Backend {
public:
explicit FileBackend(const std::filesystem::path& filename) {
auto old_filename = filename;
old_filename += ".old.txt";
// Existence checks are done within the functions themselves.
// We don't particularly care if these succeed or not.
static_cast<void>(FS::RemoveFile(old_filename));
static_cast<void>(FS::RenameFile(filename, old_filename));
file = std::make_unique<FS::IOFile>(filename, FS::FileAccessMode::Write,
FS::FileType::TextFile);
}
~FileBackend() override = default;
void Write(const Entry& entry) override {
if (!enabled) {
return;
}
bytes_written += file->WriteString(FormatLogMessage(entry).append(1, '\n'));
using namespace Common::Literals;
// Prevent logs from exceeding a set maximum size in the event that log entries are spammed.
const auto write_limit = Settings::values.extended_logging ? 1_GiB : 100_MiB;
const bool write_limit_exceeded = bytes_written > write_limit;
if (entry.log_level >= Level::Error || write_limit_exceeded) {
if (write_limit_exceeded) {
// Stop writing after the write limit is exceeded.
// Don't close the file so we can print a stacktrace if necessary
enabled = false;
}
file->Flush();
}
}
void Flush() override {
file->Flush();
}
void EnableForStacktrace() override {
enabled = true;
bytes_written = 0;
}
private:
std::unique_ptr<FS::IOFile> file;
bool enabled = true;
std::size_t bytes_written = 0;
};
/**
* Backend that writes to Visual Studio's output window
*/
class DebuggerBackend final : public Backend {
public:
explicit DebuggerBackend() = default;
~DebuggerBackend() override = default;
void Write(const Entry& entry) override {
#ifdef _WIN32
::OutputDebugStringW(UTF8ToUTF16W(FormatLogMessage(entry).append(1, '\n')).c_str());
#endif
}
void Flush() override {}
void EnableForStacktrace() override {}
};
bool initialization_in_progress_suppress_logging = true;
/**
* Static state as a singleton.
*/
class Impl {
public:
static Impl& Instance() {
static Impl backend;
return backend;
if (!instance) {
throw std::runtime_error("Using Logging instance before its initialization");
}
return *instance;
}
static void Initialize() {
if (instance) {
LOG_WARNING(Log, "Reinitializing logging backend");
return;
}
using namespace Common::FS;
const auto& log_dir = GetYuzuPath(YuzuPath::LogDir);
void(CreateDir(log_dir));
Filter filter;
filter.ParseFilterString(Settings::values.log_filter.GetValue());
instance = std::unique_ptr<Impl, decltype(&Deleter)>(new Impl(log_dir / LOG_FILE, filter),
Deleter);
initialization_in_progress_suppress_logging = false;
}
Impl(const Impl&) = delete;
@@ -46,74 +189,54 @@ public:
Impl(Impl&&) = delete;
Impl& operator=(Impl&&) = delete;
void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
const char* function, std::string message) {
message_queue.Push(
CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
}
void AddBackend(std::unique_ptr<Backend> backend) {
std::lock_guard lock{writing_mutex};
backends.push_back(std::move(backend));
}
void RemoveBackend(std::string_view backend_name) {
std::lock_guard lock{writing_mutex};
std::erase_if(backends, [&backend_name](const auto& backend) {
return backend_name == backend->GetName();
});
}
const Filter& GetGlobalFilter() const {
return filter;
}
void SetGlobalFilter(const Filter& f) {
filter = f;
}
Backend* GetBackend(std::string_view backend_name) {
const auto it =
std::find_if(backends.begin(), backends.end(),
[&backend_name](const auto& i) { return backend_name == i->GetName(); });
if (it == backends.end())
return nullptr;
return it->get();
void SetColorConsoleBackendEnabled(bool enabled) {
color_console_backend.SetEnabled(enabled);
}
void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
const char* function, std::string message) {
if (!filter.CheckMessage(log_class, log_level))
return;
const Entry& entry =
CreateEntry(log_class, log_level, filename, line_num, function, std::move(message));
message_queue.Push(entry);
}
private:
Impl() {
backend_thread = std::thread([&] {
Entry entry;
auto write_logs = [&](Entry& e) {
std::lock_guard lock{writing_mutex};
for (const auto& backend : backends) {
backend->Write(e);
}
};
while (true) {
entry = message_queue.PopWait();
if (entry.final_entry) {
break;
}
write_logs(entry);
}
// Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a
// case where a system is repeatedly spamming logs even on close.
const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
int logs_written = 0;
while (logs_written++ < MAX_LOGS_TO_WRITE && message_queue.Pop(entry)) {
write_logs(entry);
}
});
}
Impl(const std::filesystem::path& file_backend_filename, const Filter& filter_)
: filter{filter_}, file_backend{file_backend_filename}, backend_thread{std::thread([this] {
Common::SetCurrentThreadName("yuzu:Log");
Entry entry;
const auto write_logs = [this, &entry]() {
ForEachBackend([&entry](Backend& backend) { backend.Write(entry); });
};
while (true) {
entry = message_queue.PopWait();
if (entry.final_entry) {
break;
}
write_logs();
}
// Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a
// case where a system is repeatedly spamming logs even on close.
int max_logs_to_write = filter.IsDebug() ? INT_MAX : 100;
while (max_logs_to_write-- && message_queue.Pop(entry)) {
write_logs();
}
})} {}
~Impl() {
Entry entry;
entry.final_entry = true;
message_queue.Push(entry);
StopBackendThread();
}
void StopBackendThread() {
Entry stop_entry{};
stop_entry.final_entry = true;
message_queue.Push(stop_entry);
backend_thread.join();
}
@@ -135,100 +258,51 @@ private:
};
}
std::mutex writing_mutex;
std::thread backend_thread;
std::vector<std::unique_ptr<Backend>> backends;
MPSCQueue<Entry> message_queue;
void ForEachBackend(auto lambda) {
lambda(static_cast<Backend&>(debugger_backend));
lambda(static_cast<Backend&>(color_console_backend));
lambda(static_cast<Backend&>(file_backend));
}
static void Deleter(Impl* ptr) {
delete ptr;
}
static inline std::unique_ptr<Impl, decltype(&Deleter)> instance{nullptr, Deleter};
Filter filter;
DebuggerBackend debugger_backend{};
ColorConsoleBackend color_console_backend{};
FileBackend file_backend;
std::thread backend_thread;
MPSCQueue<Entry> message_queue{};
std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
};
} // namespace
ConsoleBackend::~ConsoleBackend() = default;
void ConsoleBackend::Write(const Entry& entry) {
PrintMessage(entry);
void Initialize() {
Impl::Initialize();
}
ColorConsoleBackend::~ColorConsoleBackend() = default;
void ColorConsoleBackend::Write(const Entry& entry) {
PrintColoredMessage(entry);
}
FileBackend::FileBackend(const std::filesystem::path& filename) {
auto old_filename = filename;
old_filename += ".old.txt";
// Existence checks are done within the functions themselves.
// We don't particularly care if these succeed or not.
FS::RemoveFile(old_filename);
void(FS::RenameFile(filename, old_filename));
file =
std::make_unique<FS::IOFile>(filename, FS::FileAccessMode::Write, FS::FileType::TextFile);
}
FileBackend::~FileBackend() = default;
void FileBackend::Write(const Entry& entry) {
if (!file->IsOpen()) {
return;
}
using namespace Common::Literals;
// Prevent logs from exceeding a set maximum size in the event that log entries are spammed.
constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB;
constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB;
const bool write_limit_exceeded =
bytes_written > MAX_BYTES_WRITTEN_EXTENDED ||
(bytes_written > MAX_BYTES_WRITTEN && !Settings::values.extended_logging);
// Close the file after the write limit is exceeded.
if (write_limit_exceeded) {
file->Close();
return;
}
bytes_written += file->WriteString(FormatLogMessage(entry).append(1, '\n'));
if (entry.log_level >= Level::Error) {
file->Flush();
}
}
DebuggerBackend::~DebuggerBackend() = default;
void DebuggerBackend::Write(const Entry& entry) {
#ifdef _WIN32
::OutputDebugStringW(UTF8ToUTF16W(FormatLogMessage(entry).append(1, '\n')).c_str());
#endif
void DisableLoggingInTests() {
initialization_in_progress_suppress_logging = true;
}
void SetGlobalFilter(const Filter& filter) {
Impl::Instance().SetGlobalFilter(filter);
}
void AddBackend(std::unique_ptr<Backend> backend) {
Impl::Instance().AddBackend(std::move(backend));
}
void RemoveBackend(std::string_view backend_name) {
Impl::Instance().RemoveBackend(backend_name);
}
Backend* GetBackend(std::string_view backend_name) {
return Impl::Instance().GetBackend(backend_name);
void SetColorConsoleBackendEnabled(bool enabled) {
Impl::Instance().SetColorConsoleBackendEnabled(enabled);
}
void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
unsigned int line_num, const char* function, const char* format,
const fmt::format_args& args) {
auto& instance = Impl::Instance();
const auto& filter = instance.GetGlobalFilter();
if (!filter.CheckMessage(log_class, log_level))
return;
instance.PushEntry(log_class, log_level, filename, line_num, function,
fmt::vformat(format, args));
if (!initialization_in_progress_suppress_logging) {
Impl::Instance().PushEntry(log_class, log_level, filename, line_num, function,
fmt::vformat(format, args));
}
}
} // namespace Common::Log

View File

@@ -5,120 +5,21 @@
#pragma once
#include <filesystem>
#include <memory>
#include <string>
#include <string_view>
#include "common/logging/filter.h"
#include "common/logging/log.h"
namespace Common::FS {
class IOFile;
}
namespace Common::Log {
class Filter;
/**
* Interface for logging backends. As loggers can be created and removed at runtime, this can be
* used by a frontend for adding a custom logging backend as needed
*/
class Backend {
public:
virtual ~Backend() = default;
/// Initializes the logging system. This should be the first thing called in main.
void Initialize();
virtual void SetFilter(const Filter& new_filter) {
filter = new_filter;
}
virtual const char* GetName() const = 0;
virtual void Write(const Entry& entry) = 0;
private:
Filter filter;
};
void DisableLoggingInTests();
/**
* Backend that writes to stderr without any color commands
*/
class ConsoleBackend : public Backend {
public:
~ConsoleBackend() override;
static const char* Name() {
return "console";
}
const char* GetName() const override {
return Name();
}
void Write(const Entry& entry) override;
};
/**
* Backend that writes to stderr and with color
*/
class ColorConsoleBackend : public Backend {
public:
~ColorConsoleBackend() override;
static const char* Name() {
return "color_console";
}
const char* GetName() const override {
return Name();
}
void Write(const Entry& entry) override;
};
/**
* Backend that writes to a file passed into the constructor
*/
class FileBackend : public Backend {
public:
explicit FileBackend(const std::filesystem::path& filename);
~FileBackend() override;
static const char* Name() {
return "file";
}
const char* GetName() const override {
return Name();
}
void Write(const Entry& entry) override;
private:
std::unique_ptr<FS::IOFile> file;
std::size_t bytes_written = 0;
};
/**
* Backend that writes to Visual Studio's output window
*/
class DebuggerBackend : public Backend {
public:
~DebuggerBackend() override;
static const char* Name() {
return "debugger";
}
const char* GetName() const override {
return Name();
}
void Write(const Entry& entry) override;
};
void AddBackend(std::unique_ptr<Backend> backend);
void RemoveBackend(std::string_view backend_name);
Backend* GetBackend(std::string_view backend_name);
/**
* The global filter will prevent any messages from even being processed if they are filtered. Each
* backend can have a filter, but if the level is lower than the global filter, the backend will
* never get the message
* The global filter will prevent any messages from even being processed if they are filtered.
*/
void SetGlobalFilter(const Filter& filter);
} // namespace Common::Log
void SetColorConsoleBackendEnabled(bool enabled);
} // namespace Common::Log

View File

@@ -111,6 +111,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
SUB(Service, NCM) \
SUB(Service, NFC) \
SUB(Service, NFP) \
SUB(Service, NGCT) \
SUB(Service, NIFM) \
SUB(Service, NIM) \
SUB(Service, NPNS) \

View File

@@ -81,6 +81,7 @@ enum class Class : u8 {
Service_NCM, ///< The NCM service
Service_NFC, ///< The NFC (Near-field communication) service
Service_NFP, ///< The NFP service
Service_NGCT, ///< The NGCT (No Good Content for Terra) service
Service_NIFM, ///< The NIFM (Network interface) service
Service_NIM, ///< The NIM service
Service_NPNS, ///< The NPNS service

140
src/common/lru_cache.h Normal file
View File

@@ -0,0 +1,140 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2+ or any later version
// Refer to the license.txt file included.
#pragma once
#include <deque>
#include <memory>
#include <type_traits>
#include "common/common_types.h"
namespace Common {
template <class Traits>
class LeastRecentlyUsedCache {
using ObjectType = typename Traits::ObjectType;
using TickType = typename Traits::TickType;
struct Item {
ObjectType obj;
TickType tick;
Item* next{};
Item* prev{};
};
public:
LeastRecentlyUsedCache() : first_item{}, last_item{} {}
~LeastRecentlyUsedCache() = default;
size_t Insert(ObjectType obj, TickType tick) {
const auto new_id = Build();
auto& item = item_pool[new_id];
item.obj = obj;
item.tick = tick;
Attach(item);
return new_id;
}
void Touch(size_t id, TickType tick) {
auto& item = item_pool[id];
if (item.tick >= tick) {
return;
}
item.tick = tick;
if (&item == last_item) {
return;
}
Detach(item);
Attach(item);
}
void Free(size_t id) {
auto& item = item_pool[id];
Detach(item);
item.prev = nullptr;
item.next = nullptr;
free_items.push_back(id);
}
template <typename Func>
void ForEachItemBelow(TickType tick, Func&& func) {
static constexpr bool RETURNS_BOOL =
std::is_same_v<std::invoke_result<Func, ObjectType>, bool>;
Item* iterator = first_item;
while (iterator) {
if (static_cast<s64>(tick) - static_cast<s64>(iterator->tick) < 0) {
return;
}
Item* next = iterator->next;
if constexpr (RETURNS_BOOL) {
if (func(iterator->obj)) {
return;
}
} else {
func(iterator->obj);
}
iterator = next;
}
}
private:
size_t Build() {
if (free_items.empty()) {
const size_t item_id = item_pool.size();
auto& item = item_pool.emplace_back();
item.next = nullptr;
item.prev = nullptr;
return item_id;
}
const size_t item_id = free_items.front();
free_items.pop_front();
auto& item = item_pool[item_id];
item.next = nullptr;
item.prev = nullptr;
return item_id;
}
void Attach(Item& item) {
if (!first_item) {
first_item = &item;
}
if (!last_item) {
last_item = &item;
} else {
item.prev = last_item;
last_item->next = &item;
item.next = nullptr;
last_item = &item;
}
}
void Detach(Item& item) {
if (item.prev) {
item.prev->next = item.next;
}
if (item.next) {
item.next->prev = item.prev;
}
if (&item == first_item) {
first_item = item.next;
if (first_item) {
first_item->prev = nullptr;
}
}
if (&item == last_item) {
last_item = item.prev;
if (last_item) {
last_item->next = nullptr;
}
}
}
std::deque<Item> item_pool;
std::deque<size_t> free_items;
Item* first_item{};
Item* last_item{};
};
} // namespace Common

View File

@@ -54,12 +54,11 @@ void LogSettings() {
log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
log_setting("Renderer_UseAsynchronousGpuEmulation",
values.use_asynchronous_gpu_emulation.GetValue());
log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());
log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
log_setting("Audio_OutputEngine", values.sink_id.GetValue());
log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue());
@@ -137,13 +136,12 @@ void RestoreGlobalState(bool is_powered_on) {
values.use_disk_shader_cache.SetGlobal(true);
values.gpu_accuracy.SetGlobal(true);
values.use_asynchronous_gpu_emulation.SetGlobal(true);
values.use_nvdec_emulation.SetGlobal(true);
values.nvdec_emulation.SetGlobal(true);
values.accelerate_astc.SetGlobal(true);
values.use_vsync.SetGlobal(true);
values.shader_backend.SetGlobal(true);
values.use_asynchronous_shaders.SetGlobal(true);
values.use_fast_gpu_time.SetGlobal(true);
values.use_caches_gc.SetGlobal(true);
values.bg_red.SetGlobal(true);
values.bg_green.SetGlobal(true);
values.bg_blue.SetGlobal(true);

View File

@@ -48,6 +48,12 @@ enum class FullscreenMode : u32 {
Exclusive = 1,
};
enum class NvdecEmulation : u32 {
Off = 0,
CPU = 1,
GPU = 2,
};
/** The BasicSetting class is a simple resource manager. It defines a label and default value
* alongside the actual value of the setting for simpler and less-error prone use with frontend
* configurations. Setting a default value and label is required, though subclasses may deviate from
@@ -466,7 +472,7 @@ struct Values {
RangedSetting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal,
GPUAccuracy::Extreme, "gpu_accuracy"};
Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"};
Setting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
Setting<bool> accelerate_astc{true, "accelerate_astc"};
Setting<bool> use_vsync{true, "use_vsync"};
BasicRangedSetting<u16> fps_cap{1000, 1, 1000, "fps_cap"};
@@ -475,7 +481,6 @@ struct Values {
ShaderBackend::SPIRV, "shader_backend"};
Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
Setting<bool> use_caches_gc{false, "use_caches_gc"};
Setting<u8> bg_red{0, "bg_red"};
Setting<u8> bg_green{0, "bg_green"};
@@ -489,7 +494,7 @@ struct Values {
std::chrono::seconds custom_rtc_differential;
BasicSetting<s32> current_user{0, "current_user"};
RangedSetting<s32> language_index{1, 0, 16, "language_index"};
RangedSetting<s32> language_index{1, 0, 17, "language_index"};
RangedSetting<s32> region_index{1, 0, 6, "region_index"};
RangedSetting<s32> time_zone_index{0, 0, 45, "time_zone_index"};
RangedSetting<s32> sound_index{1, 0, 2, "sound_index"};
@@ -558,9 +563,10 @@ struct Values {
BasicSetting<std::string> log_filter{"*:Info", "log_filter"};
BasicSetting<bool> use_dev_keys{false, "use_dev_keys"};
// Services
// Network
BasicSetting<std::string> bcat_backend{"none", "bcat_backend"};
BasicSetting<bool> bcat_boxcat_local{false, "bcat_boxcat_local"};
BasicSetting<std::string> network_interface{std::string(), "network_interface"};
// WebService
BasicSetting<bool> enable_telemetry{true, "enable_telemetry"};

View File

@@ -2,7 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_funcs.h"
#include <string>
#include "common/error.h"
#include "common/logging/log.h"
#include "common/thread.h"
#ifdef __APPLE__
@@ -21,8 +23,6 @@
#include <unistd.h>
#endif
#include <string>
#ifdef __FreeBSD__
#define cpu_set_t cpuset_t
#endif

View File

@@ -46,15 +46,13 @@ public:
ElementPtr* new_ptr = new ElementPtr();
write_ptr->next.store(new_ptr, std::memory_order_release);
write_ptr = new_ptr;
++size;
const size_t previous_size{size++};
// Acquire the mutex and then immediately release it as a fence.
// cv_mutex must be held or else there will be a missed wakeup if the other thread is in the
// line before cv.wait
// TODO(bunnei): This can be replaced with C++20 waitable atomics when properly supported.
// See discussion on https://github.com/yuzu-emu/yuzu/pull/3173 for details.
if (previous_size == 0) {
std::lock_guard lock{cv_mutex};
}
std::lock_guard lock{cv_mutex};
cv.notify_one();
}

View File

@@ -6,7 +6,7 @@
#include <bitset>
#include <initializer_list>
#include <xbyak.h>
#include <xbyak/xbyak.h>
#include "common/assert.h"
namespace Common::X64 {

View File

@@ -5,7 +5,7 @@
#pragma once
#include <type_traits>
#include <xbyak.h>
#include <xbyak/xbyak.h>
#include "common/x64/xbyak_abi.h"
namespace Common::X64 {

View File

@@ -263,6 +263,8 @@ add_library(core STATIC
hle/service/acc/acc_u0.h
hle/service/acc/acc_u1.cpp
hle/service/acc/acc_u1.h
hle/service/acc/async_context.cpp
hle/service/acc/async_context.h
hle/service/acc/errors.h
hle/service/acc/profile_manager.cpp
hle/service/acc/profile_manager.h
@@ -452,6 +454,8 @@ add_library(core STATIC
hle/service/nfp/nfp.h
hle/service/nfp/nfp_user.cpp
hle/service/nfp/nfp_user.h
hle/service/ngct/ngct.cpp
hle/service/ngct/ngct.h
hle/service/nifm/nifm.cpp
hle/service/nifm/nifm.h
hle/service/nim/nim.cpp
@@ -636,6 +640,8 @@ add_library(core STATIC
memory.h
network/network.cpp
network/network.h
network/network_interface.cpp
network/network_interface.h
network/sockets.h
perf_stats.cpp
perf_stats.h

View File

@@ -4,6 +4,7 @@
#include <array>
#include <atomic>
#include <exception>
#include <memory>
#include <utility>
@@ -82,9 +83,13 @@ FileSys::StorageId GetStorageIdForFrontendSlot(
}
}
} // Anonymous namespace
void KProcessDeleter(Kernel::KProcess* process) {
process->Destroy();
}
/*static*/ System System::s_instance;
using KProcessPtr = std::unique_ptr<Kernel::KProcess, decltype(&KProcessDeleter)>;
} // Anonymous namespace
FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
const std::string& path) {
@@ -234,8 +239,8 @@ struct System::Impl {
}
telemetry_session->AddInitialInfo(*app_loader, fs_controller, *content_provider);
auto main_process = Kernel::KProcess::Create(system.Kernel());
ASSERT(Kernel::KProcess::Initialize(main_process, system, "main",
main_process = KProcessPtr{Kernel::KProcess::Create(system.Kernel()), KProcessDeleter};
ASSERT(Kernel::KProcess::Initialize(main_process.get(), system, "main",
Kernel::KProcess::ProcessType::Userland)
.IsSuccess());
main_process->Open();
@@ -248,7 +253,7 @@ struct System::Impl {
static_cast<u32>(load_result));
}
AddGlueRegistrationForProcess(*app_loader, *main_process);
kernel.MakeCurrentProcess(main_process);
kernel.MakeCurrentProcess(main_process.get());
kernel.InitializeCores();
// Initialize cheat engine
@@ -317,6 +322,8 @@ struct System::Impl {
kernel.Shutdown();
memory.Reset();
applet_manager.ClearAll();
// TODO: The main process should be freed based on KAutoObject ref counting.
main_process.reset();
LOG_DEBUG(Core, "Shutdown OK");
}
@@ -375,6 +382,7 @@ struct System::Impl {
std::unique_ptr<Tegra::GPU> gpu_core;
std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
std::unique_ptr<Core::DeviceMemory> device_memory;
KProcessPtr main_process{nullptr, KProcessDeleter};
Core::Memory::Memory memory;
CpuManager cpu_manager;
std::atomic_bool is_powered_on{};
@@ -425,6 +433,20 @@ struct System::Impl {
System::System() : impl{std::make_unique<Impl>(*this)} {}
System::~System() = default;
System& System::GetInstance() {
if (!s_instance) {
throw std::runtime_error("Using System instance before its initialization");
}
return *s_instance;
}
void System::InitializeGlobalInstance() {
if (s_instance) {
throw std::runtime_error("Reinitializing Global System instance.");
}
s_instance = std::unique_ptr<System>(new System);
}
CpuManager& System::GetCpuManager() {
return impl->cpu_manager;
}

View File

@@ -120,9 +120,9 @@ public:
* Gets the instance of the System singleton class.
* @returns Reference to the instance of the System singleton class.
*/
[[deprecated("Use of the global system instance is deprecated")]] static System& GetInstance() {
return s_instance;
}
[[deprecated("Use of the global system instance is deprecated")]] static System& GetInstance();
static void InitializeGlobalInstance();
/// Enumeration representing the return values of the System Initialize and Load process.
enum class ResultStatus : u32 {
@@ -396,7 +396,7 @@ private:
struct Impl;
std::unique_ptr<Impl> impl;
static System s_instance;
inline static std::unique_ptr<System> s_instance{};
};
} // namespace Core

View File

@@ -21,34 +21,25 @@ namespace Core {
CpuManager::CpuManager(System& system_) : system{system_} {}
CpuManager::~CpuManager() = default;
void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) {
cpu_manager.RunThread(core);
void CpuManager::ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager,
std::size_t core) {
cpu_manager.RunThread(stop_token, core);
}
void CpuManager::Initialize() {
running_mode = true;
if (is_multicore) {
for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
core_data[core].host_thread =
std::make_unique<std::thread>(ThreadStart, std::ref(*this), core);
core_data[core].host_thread = std::jthread(ThreadStart, std::ref(*this), core);
}
} else {
core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0);
core_data[0].host_thread = std::jthread(ThreadStart, std::ref(*this), 0);
}
}
void CpuManager::Shutdown() {
running_mode = false;
Pause(false);
if (is_multicore) {
for (auto& data : core_data) {
data.host_thread->join();
data.host_thread.reset();
}
} else {
core_data[0].host_thread->join();
core_data[0].host_thread.reset();
}
}
std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() {
@@ -317,7 +308,7 @@ void CpuManager::Pause(bool paused) {
}
}
void CpuManager::RunThread(std::size_t core) {
void CpuManager::RunThread(std::stop_token stop_token, std::size_t core) {
/// Initialization
system.RegisterCoreThread(core);
std::string name;
@@ -361,6 +352,10 @@ void CpuManager::RunThread(std::size_t core) {
return;
}
if (stop_token.stop_requested()) {
break;
}
auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
data.is_running = true;
Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext());

View File

@@ -78,9 +78,9 @@ private:
void SingleCoreRunSuspendThread();
void SingleCorePause(bool paused);
static void ThreadStart(CpuManager& cpu_manager, std::size_t core);
static void ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager, std::size_t core);
void RunThread(std::size_t core);
void RunThread(std::stop_token stop_token, std::size_t core);
struct CoreData {
std::shared_ptr<Common::Fiber> host_context;
@@ -89,7 +89,7 @@ private:
std::atomic<bool> is_running;
std::atomic<bool> is_paused;
std::atomic<bool> initialized;
std::unique_ptr<std::thread> host_thread;
std::jthread host_thread;
};
std::atomic<bool> running_mode{};

View File

@@ -5,6 +5,7 @@
#pragma once
#include <array>
#include <string>
#include <vector>
#include "common/common_funcs.h"

View File

@@ -28,13 +28,20 @@ constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 12.1.0-1.0";
// Atmosphere version constants.
constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 0;
constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 19;
constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 5;
constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 1;
constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 0;
constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 0;
constexpr u32 AtmosphereTargetFirmwareWithRevision(u8 major, u8 minor, u8 micro, u8 rev) {
return u32{major} << 24 | u32{minor} << 16 | u32{micro} << 8 | u32{rev};
}
constexpr u32 AtmosphereTargetFirmware(u8 major, u8 minor, u8 micro) {
return AtmosphereTargetFirmwareWithRevision(major, minor, micro, 0);
}
constexpr u32 GetTargetFirmware() {
return u32{HOS_VERSION_MAJOR} << 24 | u32{HOS_VERSION_MINOR} << 16 |
u32{HOS_VERSION_MICRO} << 8 | 0U;
return AtmosphereTargetFirmware(HOS_VERSION_MAJOR, HOS_VERSION_MINOR, HOS_VERSION_MICRO);
}
} // namespace HLE::ApiVersion

View File

@@ -267,20 +267,23 @@ struct KernelCore::Impl {
}
}
/// Creates a new host thread ID, should only be called by GetHostThreadId
u32 AllocateHostThreadId(std::optional<std::size_t> core_id) {
if (core_id) {
// The first for slots are reserved for CPU core threads
ASSERT(*core_id < Core::Hardware::NUM_CPU_CORES);
return static_cast<u32>(*core_id);
} else {
return next_host_thread_id++;
static inline thread_local u32 host_thread_id = UINT32_MAX;
/// Gets the host thread ID for the caller, allocating a new one if this is the first time
u32 GetHostThreadId(std::size_t core_id) {
if (host_thread_id == UINT32_MAX) {
// The first four slots are reserved for CPU core threads
ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
host_thread_id = static_cast<u32>(core_id);
}
return host_thread_id;
}
/// Gets the host thread ID for the caller, allocating a new one if this is the first time
u32 GetHostThreadId(std::optional<std::size_t> core_id = std::nullopt) {
const thread_local auto host_thread_id{AllocateHostThreadId(core_id)};
u32 GetHostThreadId() {
if (host_thread_id == UINT32_MAX) {
host_thread_id = next_host_thread_id++;
}
return host_thread_id;
}

View File

@@ -5,6 +5,7 @@
#pragma once
#include <array>
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>

View File

@@ -1078,8 +1078,8 @@ static ResultCode GetThreadContext(Core::System& system, VAddr out_context, Hand
for (auto i = 0; i < static_cast<s32>(Core::Hardware::NUM_CPU_CORES); ++i) {
if (thread.GetPointerUnsafe() == kernel.Scheduler(i).GetCurrentThread()) {
current = true;
break;
}
break;
}
// If the thread is current, retry until it isn't.

View File

@@ -23,6 +23,7 @@
#include "core/hle/service/acc/acc_su.h"
#include "core/hle/service/acc/acc_u0.h"
#include "core/hle/service/acc/acc_u1.h"
#include "core/hle/service/acc/async_context.h"
#include "core/hle/service/acc/errors.h"
#include "core/hle/service/acc/profile_manager.h"
#include "core/hle/service/glue/arp.h"
@@ -454,22 +455,6 @@ public:
: IProfileCommon{system_, "IProfileEditor", true, user_id_, profile_manager_} {}
};
class IAsyncContext final : public ServiceFramework<IAsyncContext> {
public:
explicit IAsyncContext(Core::System& system_) : ServiceFramework{system_, "IAsyncContext"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "GetSystemEvent"},
{1, nullptr, "Cancel"},
{2, nullptr, "HasDone"},
{3, nullptr, "GetResult"},
};
// clang-format on
RegisterHandlers(functions);
}
};
class ISessionObject final : public ServiceFramework<ISessionObject> {
public:
explicit ISessionObject(Core::System& system_, Common::UUID)
@@ -504,16 +489,44 @@ public:
}
};
class EnsureTokenIdCacheAsyncInterface final : public IAsyncContext {
public:
explicit EnsureTokenIdCacheAsyncInterface(Core::System& system_) : IAsyncContext{system_} {
MarkComplete();
}
~EnsureTokenIdCacheAsyncInterface() = default;
void LoadIdTokenCache(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_ACC, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
}
protected:
bool IsComplete() const override {
return true;
}
void Cancel() override {}
ResultCode GetResult() const override {
return ResultSuccess;
}
};
class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
public:
explicit IManagerForApplication(Core::System& system_, Common::UUID user_id_)
: ServiceFramework{system_, "IManagerForApplication"}, user_id{user_id_} {
: ServiceFramework{system_, "IManagerForApplication"},
ensure_token_id{std::make_shared<EnsureTokenIdCacheAsyncInterface>(system)},
user_id{user_id_} {
// clang-format off
static const FunctionInfo functions[] = {
{0, &IManagerForApplication::CheckAvailability, "CheckAvailability"},
{1, &IManagerForApplication::GetAccountId, "GetAccountId"},
{2, nullptr, "EnsureIdTokenCacheAsync"},
{3, nullptr, "LoadIdTokenCache"},
{2, &IManagerForApplication::EnsureIdTokenCacheAsync, "EnsureIdTokenCacheAsync"},
{3, &IManagerForApplication::LoadIdTokenCache, "LoadIdTokenCache"},
{130, &IManagerForApplication::GetNintendoAccountUserResourceCacheForApplication, "GetNintendoAccountUserResourceCacheForApplication"},
{150, nullptr, "CreateAuthorizationRequest"},
{160, &IManagerForApplication::StoreOpenContext, "StoreOpenContext"},
@@ -540,6 +553,20 @@ private:
rb.PushRaw<u64>(user_id.GetNintendoID());
}
void EnsureIdTokenCacheAsync(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_ACC, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(ResultSuccess);
rb.PushIpcInterface(ensure_token_id);
}
void LoadIdTokenCache(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_ACC, "(STUBBED) called");
ensure_token_id->LoadIdTokenCache(ctx);
}
void GetNintendoAccountUserResourceCacheForApplication(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_ACC, "(STUBBED) called");
@@ -562,6 +589,7 @@ private:
rb.Push(ResultSuccess);
}
std::shared_ptr<EnsureTokenIdCacheAsyncInterface> ensure_token_id{};
Common::UUID user_id{Common::INVALID_UUID};
};

View File

@@ -0,0 +1,68 @@
// Copyright 2021 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/core.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/service/acc/async_context.h"
namespace Service::Account {
IAsyncContext::IAsyncContext(Core::System& system_)
: ServiceFramework{system_, "IAsyncContext"}, compeletion_event{system_.Kernel()} {
Kernel::KAutoObject::Create(std::addressof(compeletion_event));
compeletion_event.Initialize("IAsyncContext:CompletionEvent");
// clang-format off
static const FunctionInfo functions[] = {
{0, &IAsyncContext::GetSystemEvent, "GetSystemEvent"},
{1, &IAsyncContext::Cancel, "Cancel"},
{2, &IAsyncContext::HasDone, "HasDone"},
{3, &IAsyncContext::GetResult, "GetResult"},
};
// clang-format on
RegisterHandlers(functions);
}
void IAsyncContext::GetSystemEvent(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_ACC, "called");
IPC::ResponseBuilder rb{ctx, 2, 1};
rb.Push(ResultSuccess);
rb.PushCopyObjects(compeletion_event.GetReadableEvent());
}
void IAsyncContext::Cancel(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_ACC, "called");
Cancel();
MarkComplete();
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
}
void IAsyncContext::HasDone(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_ACC, "called");
is_complete.store(IsComplete());
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
rb.Push(is_complete.load());
}
void IAsyncContext::GetResult(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_ACC, "called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(GetResult());
}
void IAsyncContext::MarkComplete() {
is_complete.store(true);
compeletion_event.GetWritableEvent().Signal();
}
} // namespace Service::Account

View File

@@ -0,0 +1,37 @@
// Copyright 2021 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <atomic>
#include "core/hle/kernel/k_event.h"
#include "core/hle/service/service.h"
namespace Core {
class System;
}
namespace Service::Account {
class IAsyncContext : public ServiceFramework<IAsyncContext> {
public:
explicit IAsyncContext(Core::System& system_);
void GetSystemEvent(Kernel::HLERequestContext& ctx);
void Cancel(Kernel::HLERequestContext& ctx);
void HasDone(Kernel::HLERequestContext& ctx);
void GetResult(Kernel::HLERequestContext& ctx);
protected:
virtual bool IsComplete() const = 0;
virtual void Cancel() = 0;
virtual ResultCode GetResult() const = 0;
void MarkComplete();
std::atomic<bool> is_complete{false};
Kernel::KEvent compeletion_event;
};
} // namespace Service::Account

View File

@@ -1270,7 +1270,8 @@ void ILibraryAppletCreator::CreateHandleStorage(Kernel::HLERequestContext& ctx)
IApplicationFunctions::IApplicationFunctions(Core::System& system_)
: ServiceFramework{system_, "IApplicationFunctions"}, gpu_error_detected_event{system.Kernel()},
friend_invitation_storage_channel_event{system.Kernel()},
health_warning_disappeared_system_event{system.Kernel()} {
notification_storage_channel_event{system.Kernel()}, health_warning_disappeared_system_event{
system.Kernel()} {
// clang-format off
static const FunctionInfo functions[] = {
{1, &IApplicationFunctions::PopLaunchParameter, "PopLaunchParameter"},
@@ -1322,7 +1323,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
{131, nullptr, "SetDelayTimeToAbortOnGpuError"},
{140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"},
{141, &IApplicationFunctions::TryPopFromFriendInvitationStorageChannel, "TryPopFromFriendInvitationStorageChannel"},
{150, nullptr, "GetNotificationStorageChannelEvent"},
{150, &IApplicationFunctions::GetNotificationStorageChannelEvent, "GetNotificationStorageChannelEvent"},
{151, nullptr, "TryPopFromNotificationStorageChannel"},
{160, &IApplicationFunctions::GetHealthWarningDisappearedSystemEvent, "GetHealthWarningDisappearedSystemEvent"},
{170, nullptr, "SetHdcpAuthenticationActivated"},
@@ -1340,11 +1341,14 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
Kernel::KAutoObject::Create(std::addressof(gpu_error_detected_event));
Kernel::KAutoObject::Create(std::addressof(friend_invitation_storage_channel_event));
Kernel::KAutoObject::Create(std::addressof(notification_storage_channel_event));
Kernel::KAutoObject::Create(std::addressof(health_warning_disappeared_system_event));
gpu_error_detected_event.Initialize("IApplicationFunctions:GpuErrorDetectedSystemEvent");
friend_invitation_storage_channel_event.Initialize(
"IApplicationFunctions:FriendInvitationStorageChannelEvent");
notification_storage_channel_event.Initialize(
"IApplicationFunctions:NotificationStorageChannelEvent");
health_warning_disappeared_system_event.Initialize(
"IApplicationFunctions:HealthWarningDisappearedSystemEvent");
}
@@ -1762,6 +1766,14 @@ void IApplicationFunctions::TryPopFromFriendInvitationStorageChannel(
rb.Push(ERR_NO_DATA_IN_CHANNEL);
}
void IApplicationFunctions::GetNotificationStorageChannelEvent(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_AM, "called");
IPC::ResponseBuilder rb{ctx, 2, 1};
rb.Push(ResultSuccess);
rb.PushCopyObjects(notification_storage_channel_event.GetReadableEvent());
}
void IApplicationFunctions::GetHealthWarningDisappearedSystemEvent(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_AM, "called");

View File

@@ -295,6 +295,7 @@ private:
void GetGpuErrorDetectedSystemEvent(Kernel::HLERequestContext& ctx);
void GetFriendInvitationStorageChannelEvent(Kernel::HLERequestContext& ctx);
void TryPopFromFriendInvitationStorageChannel(Kernel::HLERequestContext& ctx);
void GetNotificationStorageChannelEvent(Kernel::HLERequestContext& ctx);
void GetHealthWarningDisappearedSystemEvent(Kernel::HLERequestContext& ctx);
bool launch_popped_application_specific = false;
@@ -302,6 +303,7 @@ private:
s32 previous_program_index{-1};
Kernel::KEvent gpu_error_detected_event;
Kernel::KEvent friend_invitation_storage_channel_event;
Kernel::KEvent notification_storage_channel_event;
Kernel::KEvent health_warning_disappeared_system_event;
};

View File

@@ -16,6 +16,30 @@
namespace Service::AM::Applets {
struct ErrorCode {
u32 error_category{};
u32 error_number{};
static constexpr ErrorCode FromU64(u64 error_code) {
return {
.error_category{static_cast<u32>(error_code >> 32)},
.error_number{static_cast<u32>(error_code & 0xFFFFFFFF)},
};
}
static constexpr ErrorCode FromResultCode(ResultCode result) {
return {
.error_category{2000 + static_cast<u32>(result.module.Value())},
.error_number{result.description.Value()},
};
}
constexpr ResultCode ToResultCode() const {
return ResultCode{static_cast<ErrorModule>(error_category - 2000), error_number};
}
};
static_assert(sizeof(ErrorCode) == 0x8, "ErrorCode has incorrect size.");
#pragma pack(push, 4)
struct ShowError {
u8 mode;
@@ -76,12 +100,7 @@ void CopyArgumentData(const std::vector<u8>& data, T& variable) {
}
ResultCode Decode64BitError(u64 error) {
const auto description = (error >> 32) & 0x1FFF;
auto module = error & 0x3FF;
if (module >= 2000)
module -= 2000;
module &= 0x1FF;
return {static_cast<ErrorModule>(module), static_cast<u32>(description)};
return ErrorCode::FromU64(error).ToResultCode();
}
} // Anonymous namespace

View File

@@ -97,14 +97,24 @@ ResultCode VfsDirectoryServiceWrapper::DeleteFile(const std::string& path_) cons
ResultCode VfsDirectoryServiceWrapper::CreateDirectory(const std::string& path_) const {
std::string path(Common::FS::SanitizePath(path_));
auto dir = GetDirectoryRelativeWrapped(backing, Common::FS::GetParentPath(path));
if (dir == nullptr || Common::FS::GetFilename(Common::FS::GetParentPath(path)).empty()) {
dir = backing;
}
auto new_dir = dir->CreateSubdirectory(Common::FS::GetFilename(path));
if (new_dir == nullptr) {
// TODO(DarkLordZach): Find a better error code for this
return ResultUnknown;
// NOTE: This is inaccurate behavior. CreateDirectory is not recursive.
// CreateDirectory should return PathNotFound if the parent directory does not exist.
// This is here temporarily in order to have UMM "work" in the meantime.
// TODO (Morph): Remove this when a hardware test verifies the correct behavior.
const auto components = Common::FS::SplitPathComponents(path);
std::string relative_path;
for (const auto& component : components) {
// Skip empty path components
if (component.empty()) {
continue;
}
relative_path = Common::FS::SanitizePath(relative_path + '/' + component);
auto new_dir = backing->CreateSubdirectory(relative_path);
if (new_dir == nullptr) {
// TODO(DarkLordZach): Find a better error code for this
return ResultUnknown;
}
}
return ResultSuccess;
}

View File

@@ -15,6 +15,20 @@
namespace Service::HID {
class Controller_Touchscreen final : public ControllerBase {
public:
enum class TouchScreenModeForNx : u8 {
UseSystemSetting,
Finger,
Heat2,
};
struct TouchScreenConfigurationForNx {
TouchScreenModeForNx mode;
INSERT_PADDING_BYTES_NOINIT(0x7);
INSERT_PADDING_BYTES_NOINIT(0xF); // Reserved
};
static_assert(sizeof(TouchScreenConfigurationForNx) == 0x17,
"TouchScreenConfigurationForNx is an invalid size");
explicit Controller_Touchscreen(Core::System& system_);
~Controller_Touchscreen() override;

View File

@@ -331,7 +331,7 @@ Hid::Hid(Core::System& system_)
{529, nullptr, "SetDisallowedPalmaConnection"},
{1000, &Hid::SetNpadCommunicationMode, "SetNpadCommunicationMode"},
{1001, &Hid::GetNpadCommunicationMode, "GetNpadCommunicationMode"},
{1002, nullptr, "SetTouchScreenConfiguration"},
{1002, &Hid::SetTouchScreenConfiguration, "SetTouchScreenConfiguration"},
{1003, nullptr, "IsFirmwareUpdateNeededForNotification"},
{2000, nullptr, "ActivateDigitizer"},
};
@@ -1631,6 +1631,18 @@ void Hid::GetNpadCommunicationMode(Kernel::HLERequestContext& ctx) {
.GetNpadCommunicationMode());
}
void Hid::SetTouchScreenConfiguration(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto touchscreen_mode{rp.PopRaw<Controller_Touchscreen::TouchScreenConfigurationForNx>()};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_WARNING(Service_HID, "(STUBBED) called, touchscreen_mode={}, applet_resource_user_id={}",
touchscreen_mode.mode, applet_resource_user_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
}
class HidDbg final : public ServiceFramework<HidDbg> {
public:
explicit HidDbg(Core::System& system_) : ServiceFramework{system_, "hid:dbg"} {

View File

@@ -159,6 +159,7 @@ private:
void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
void SetNpadCommunicationMode(Kernel::HLERequestContext& ctx);
void GetNpadCommunicationMode(Kernel::HLERequestContext& ctx);
void SetTouchScreenConfiguration(Kernel::HLERequestContext& ctx);
enum class VibrationDeviceType : u32 {
Unknown = 0,

View File

@@ -0,0 +1,46 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included
#include "common/string_util.h"
#include "core/core.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/service/ngct/ngct.h"
#include "core/hle/service/service.h"
namespace Service::NGCT {
class IService final : public ServiceFramework<IService> {
public:
explicit IService(Core::System& system_) : ServiceFramework{system_, "ngct:u"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "Match"},
{1, &IService::Filter, "Filter"},
};
// clang-format on
RegisterHandlers(functions);
}
private:
void Filter(Kernel::HLERequestContext& ctx) {
const auto buffer = ctx.ReadBuffer();
const auto text = Common::StringFromFixedZeroTerminatedBuffer(
reinterpret_cast<const char*>(buffer.data()), buffer.size());
LOG_WARNING(Service_NGCT, "(STUBBED) called, text={}", text);
// Return the same string since we don't censor anything
ctx.WriteBuffer(buffer);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
}
};
void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
std::make_shared<IService>(system)->InstallAsService(system.ServiceManager());
}
} // namespace Service::NGCT

View File

@@ -0,0 +1,20 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included
#pragma once
namespace Core {
class System;
}
namespace Service::SM {
class ServiceManager;
}
namespace Service::NGCT {
/// Registers all NGCT services with the specified service manager.
void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system);
} // namespace Service::NGCT

View File

@@ -11,6 +11,7 @@
#include "core/hle/service/nifm/nifm.h"
#include "core/hle/service/service.h"
#include "core/network/network.h"
#include "core/network/network_interface.h"
namespace Service::NIFM {
@@ -179,10 +180,10 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
if (Settings::values.bcat_backend.GetValue() == "none") {
rb.PushEnum(RequestState::NotSubmitted);
} else {
if (Network::GetHostIPv4Address().has_value()) {
rb.PushEnum(RequestState::Connected);
} else {
rb.PushEnum(RequestState::NotSubmitted);
}
}
@@ -276,37 +277,45 @@ private:
void GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_NIFM, "(STUBBED) called");
const SfNetworkProfileData network_profile_data{
.ip_setting_data{
.ip_address_setting{
.is_automatic{true},
.current_address{192, 168, 1, 100},
.subnet_mask{255, 255, 255, 0},
.gateway{192, 168, 1, 1},
const auto net_iface = Network::GetSelectedNetworkInterface();
const SfNetworkProfileData network_profile_data = [&net_iface] {
if (!net_iface) {
return SfNetworkProfileData{};
}
return SfNetworkProfileData{
.ip_setting_data{
.ip_address_setting{
.is_automatic{true},
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
.gateway{Network::TranslateIPv4(net_iface->gateway)},
},
.dns_setting{
.is_automatic{true},
.primary_dns{1, 1, 1, 1},
.secondary_dns{1, 0, 0, 1},
},
.proxy_setting{
.enabled{false},
.port{},
.proxy_server{},
.automatic_auth_enabled{},
.user{},
.password{},
},
.mtu{1500},
},
.dns_setting{
.is_automatic{true},
.primary_dns{1, 1, 1, 1},
.secondary_dns{1, 0, 0, 1},
.uuid{0xdeadbeef, 0xdeadbeef},
.network_name{"yuzu Network"},
.wireless_setting_data{
.ssid_length{12},
.ssid{"yuzu Network"},
.passphrase{"yuzupassword"},
},
.proxy_setting{
.enabled{false},
.port{},
.proxy_server{},
.automatic_auth_enabled{},
.user{},
.password{},
},
.mtu{1500},
},
.uuid{0xdeadbeef, 0xdeadbeef},
.network_name{"yuzu Network"},
.wireless_setting_data{
.ssid_length{12},
.ssid{"yuzu Network"},
.passphrase{"yuzupassword"},
},
};
};
}();
ctx.WriteBuffer(network_profile_data);
@@ -322,12 +331,15 @@ private:
void GetCurrentIpAddress(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_NIFM, "(STUBBED) called");
const auto [ipv4, error] = Network::GetHostIPv4Address();
UNIMPLEMENTED_IF(error != Network::Errno::SUCCESS);
auto ipv4 = Network::GetHostIPv4Address();
if (!ipv4) {
LOG_ERROR(Service_NIFM, "Couldn't get host IPv4 address, defaulting to 0.0.0.0");
ipv4.emplace(Network::IPv4Address{0, 0, 0, 0});
}
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
rb.PushRaw(ipv4);
rb.PushRaw(*ipv4);
}
void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_NIFM, "called");
@@ -348,25 +360,33 @@ private:
LOG_WARNING(Service_NIFM, "(STUBBED) called");
struct IpConfigInfo {
IpAddressSetting ip_address_setting;
DnsSetting dns_setting;
IpAddressSetting ip_address_setting{};
DnsSetting dns_setting{};
};
static_assert(sizeof(IpConfigInfo) == sizeof(IpAddressSetting) + sizeof(DnsSetting),
"IpConfigInfo has incorrect size.");
const IpConfigInfo ip_config_info{
.ip_address_setting{
.is_automatic{true},
.current_address{192, 168, 1, 100},
.subnet_mask{255, 255, 255, 0},
.gateway{192, 168, 1, 1},
},
.dns_setting{
.is_automatic{true},
.primary_dns{1, 1, 1, 1},
.secondary_dns{1, 0, 0, 1},
},
};
const auto net_iface = Network::GetSelectedNetworkInterface();
const IpConfigInfo ip_config_info = [&net_iface] {
if (!net_iface) {
return IpConfigInfo{};
}
return IpConfigInfo{
.ip_address_setting{
.is_automatic{true},
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
.gateway{Network::TranslateIPv4(net_iface->gateway)},
},
.dns_setting{
.is_automatic{true},
.primary_dns{1, 1, 1, 1},
.secondary_dns{1, 0, 0, 1},
},
};
}();
IPC::ResponseBuilder rb{ctx, 2 + (sizeof(IpConfigInfo) + 3) / sizeof(u32)};
rb.Push(ResultSuccess);
@@ -384,10 +404,10 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
if (Settings::values.bcat_backend.GetValue() == "none") {
rb.Push<u8>(0);
} else {
if (Network::GetHostIPv4Address().has_value()) {
rb.Push<u8>(1);
} else {
rb.Push<u8>(0);
}
}
void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
@@ -395,10 +415,10 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
if (Settings::values.bcat_backend.GetValue() == "none") {
rb.Push<u8>(0);
} else {
if (Network::GetHostIPv4Address().has_value()) {
rb.Push<u8>(1);
} else {
rb.Push<u8>(0);
}
}
};

View File

@@ -42,15 +42,14 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect) {
VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
LOG_TRACE(Service,
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
addr, offset, width, height, stride, format);
using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
const Tegra::FramebufferConfig framebuffer{
addr, offset, width, height, stride, static_cast<PixelFormat>(format),
transform, crop_rect};
const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format);
const Tegra::FramebufferConfig framebuffer{addr, offset, width, height,
stride, pixel_format, transform, crop_rect};
system.GetPerfStats().EndSystemFrame();
system.GPU().SwapBuffers(&framebuffer);

View File

@@ -9,17 +9,20 @@
#include "core/core.h"
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
namespace Service::NVFlinger {
BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_)
: id(id_), layer_id(layer_id_), buffer_wait_event{kernel} {
Kernel::KAutoObject::Create(std::addressof(buffer_wait_event));
buffer_wait_event.Initialize("BufferQueue:WaitEvent");
BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
KernelHelpers::ServiceContext& service_context_)
: id(id_), layer_id(layer_id_), service_context{service_context_} {
buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent");
}
BufferQueue::~BufferQueue() = default;
BufferQueue::~BufferQueue() {
service_context.CloseEvent(buffer_wait_event);
}
void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) {
ASSERT(slot < buffer_slots);
@@ -41,7 +44,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
.multi_fence = {},
};
buffer_wait_event.GetWritableEvent().Signal();
buffer_wait_event->GetWritableEvent().Signal();
}
std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
@@ -119,7 +122,7 @@ void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& mult
}
free_buffers_condition.notify_one();
buffer_wait_event.GetWritableEvent().Signal();
buffer_wait_event->GetWritableEvent().Signal();
}
std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
@@ -154,7 +157,7 @@ void BufferQueue::ReleaseBuffer(u32 slot) {
}
free_buffers_condition.notify_one();
buffer_wait_event.GetWritableEvent().Signal();
buffer_wait_event->GetWritableEvent().Signal();
}
void BufferQueue::Connect() {
@@ -169,7 +172,7 @@ void BufferQueue::Disconnect() {
std::unique_lock lock{queue_sequence_mutex};
queue_sequence.clear();
}
buffer_wait_event.GetWritableEvent().Signal();
buffer_wait_event->GetWritableEvent().Signal();
is_connect = false;
free_buffers_condition.notify_one();
}
@@ -189,11 +192,11 @@ u32 BufferQueue::Query(QueryType type) {
}
Kernel::KWritableEvent& BufferQueue::GetWritableBufferWaitEvent() {
return buffer_wait_event.GetWritableEvent();
return buffer_wait_event->GetWritableEvent();
}
Kernel::KReadableEvent& BufferQueue::GetBufferWaitEvent() {
return buffer_wait_event.GetReadableEvent();
return buffer_wait_event->GetReadableEvent();
}
} // namespace Service::NVFlinger

View File

@@ -24,6 +24,10 @@ class KReadableEvent;
class KWritableEvent;
} // namespace Kernel
namespace Service::KernelHelpers {
class ServiceContext;
} // namespace Service::KernelHelpers
namespace Service::NVFlinger {
constexpr u32 buffer_slots = 0x40;
@@ -38,7 +42,9 @@ struct IGBPBuffer {
u32_le index;
INSERT_PADDING_WORDS(3);
u32_le gpu_buffer_id;
INSERT_PADDING_WORDS(17);
INSERT_PADDING_WORDS(6);
u32_le external_format;
INSERT_PADDING_WORDS(10);
u32_le nvmap_handle;
u32_le offset;
INSERT_PADDING_WORDS(60);
@@ -54,7 +60,8 @@ public:
NativeWindowFormat = 2,
};
explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_);
explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
KernelHelpers::ServiceContext& service_context_);
~BufferQueue();
enum class BufferTransformFlags : u32 {
@@ -130,12 +137,14 @@ private:
std::list<u32> free_buffers;
std::array<Buffer, buffer_slots> buffers;
std::list<u32> queue_sequence;
Kernel::KEvent buffer_wait_event;
Kernel::KEvent* buffer_wait_event{};
std::mutex free_buffers_mutex;
std::condition_variable free_buffers_condition;
std::mutex queue_sequence_mutex;
KernelHelpers::ServiceContext& service_context;
};
} // namespace Service::NVFlinger

View File

@@ -61,12 +61,13 @@ void NVFlinger::SplitVSync() {
}
}
NVFlinger::NVFlinger(Core::System& system_) : system(system_) {
displays.emplace_back(0, "Default", system);
displays.emplace_back(1, "External", system);
displays.emplace_back(2, "Edid", system);
displays.emplace_back(3, "Internal", system);
displays.emplace_back(4, "Null", system);
NVFlinger::NVFlinger(Core::System& system_)
: system(system_), service_context(system_, "nvflinger") {
displays.emplace_back(0, "Default", service_context, system);
displays.emplace_back(1, "External", service_context, system);
displays.emplace_back(2, "Edid", service_context, system);
displays.emplace_back(3, "Internal", service_context, system);
displays.emplace_back(4, "Null", service_context, system);
guard = std::make_shared<std::mutex>();
// Schedule the screen composition events
@@ -146,7 +147,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
void NVFlinger::CreateLayerAtId(VI::Display& display, u64 layer_id) {
const u32 buffer_queue_id = next_buffer_queue_id++;
buffer_queues.emplace_back(
std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id));
std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id, service_context));
display.CreateLayer(layer_id, *buffer_queues.back());
}
@@ -297,7 +298,7 @@ void NVFlinger::Compose() {
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
ASSERT(nvdisp);
nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.external_format,
igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
buffer->get().transform, buffer->get().crop_rect);

View File

@@ -15,6 +15,7 @@
#include <vector>
#include "common/common_types.h"
#include "core/hle/service/kernel_helpers.h"
namespace Common {
class Event;
@@ -135,6 +136,8 @@ private:
std::unique_ptr<std::thread> vsync_thread;
std::unique_ptr<Common::Event> wait_event;
std::atomic<bool> is_running{};
KernelHelpers::ServiceContext service_context;
};
} // namespace Service::NVFlinger

View File

@@ -46,6 +46,7 @@
#include "core/hle/service/ncm/ncm.h"
#include "core/hle/service/nfc/nfc.h"
#include "core/hle/service/nfp/nfp.h"
#include "core/hle/service/ngct/ngct.h"
#include "core/hle/service/nifm/nifm.h"
#include "core/hle/service/nim/nim.h"
#include "core/hle/service/npns/npns.h"
@@ -271,6 +272,7 @@ Services::Services(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system
NCM::InstallInterfaces(*sm, system);
NFC::InstallInterfaces(*sm, system);
NFP::InstallInterfaces(*sm, system);
NGCT::InstallInterfaces(*sm, system);
NIFM::InstallInterfaces(*sm, system);
NIM::InstallInterfaces(*sm, system);
NPNS::InstallInterfaces(*sm, system);

View File

@@ -12,18 +12,21 @@
#include "core/hle/kernel/k_event.h"
#include "core/hle/kernel/k_readable_event.h"
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/vi/display/vi_display.h"
#include "core/hle/service/vi/layer/vi_layer.h"
namespace Service::VI {
Display::Display(u64 id, std::string name_, Core::System& system)
: display_id{id}, name{std::move(name_)}, vsync_event{system.Kernel()} {
Kernel::KAutoObject::Create(std::addressof(vsync_event));
vsync_event.Initialize(fmt::format("Display VSync Event {}", id));
Display::Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_,
Core::System& system_)
: display_id{id}, name{std::move(name_)}, service_context{service_context_} {
vsync_event = service_context.CreateEvent(fmt::format("Display VSync Event {}", id));
}
Display::~Display() = default;
Display::~Display() {
service_context.CloseEvent(vsync_event);
}
Layer& Display::GetLayer(std::size_t index) {
return *layers.at(index);
@@ -34,11 +37,11 @@ const Layer& Display::GetLayer(std::size_t index) const {
}
Kernel::KReadableEvent& Display::GetVSyncEvent() {
return vsync_event.GetReadableEvent();
return vsync_event->GetReadableEvent();
}
void Display::SignalVSyncEvent() {
vsync_event.GetWritableEvent().Signal();
vsync_event->GetWritableEvent().Signal();
}
void Display::CreateLayer(u64 layer_id, NVFlinger::BufferQueue& buffer_queue) {

View File

@@ -18,6 +18,9 @@ class KEvent;
namespace Service::NVFlinger {
class BufferQueue;
}
namespace Service::KernelHelpers {
class ServiceContext;
} // namespace Service::KernelHelpers
namespace Service::VI {
@@ -31,10 +34,13 @@ class Display {
public:
/// Constructs a display with a given unique ID and name.
///
/// @param id The unique ID for this display.
/// @param id The unique ID for this display.
/// @param service_context_ The ServiceContext for the owning service.
/// @param name_ The name for this display.
/// @param system_ The global system instance.
///
Display(u64 id, std::string name_, Core::System& system);
Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_,
Core::System& system_);
~Display();
/// Gets the unique ID assigned to this display.
@@ -98,9 +104,10 @@ public:
private:
u64 display_id;
std::string name;
KernelHelpers::ServiceContext& service_context;
std::vector<std::shared_ptr<Layer>> layers;
Kernel::KEvent vsync_event;
Kernel::KEvent* vsync_event{};
};
} // namespace Service::VI

View File

@@ -1158,7 +1158,7 @@ private:
const auto layer_id = nv_flinger.CreateLayer(display_id);
if (!layer_id) {
LOG_ERROR(Service_VI, "Layer not found! layer_id={}", *layer_id);
LOG_ERROR(Service_VI, "Layer not found! display_id={}", display_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_NOT_FOUND);
return;

View File

@@ -7,12 +7,14 @@
#include <limits>
#include <utility>
#include <vector>
#include "common/common_funcs.h"
#include "common/error.h"
#ifdef _WIN32
#define _WINSOCK_DEPRECATED_NO_WARNINGS // gethostname
#include <winsock2.h>
#include <ws2tcpip.h>
#elif YUZU_UNIX
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
@@ -27,7 +29,9 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "core/network/network.h"
#include "core/network/network_interface.h"
#include "core/network/sockets.h"
namespace Network {
@@ -47,11 +51,6 @@ void Finalize() {
WSACleanup();
}
constexpr IPv4Address TranslateIPv4(in_addr addr) {
auto& bytes = addr.S_un.S_un_b;
return IPv4Address{bytes.s_b1, bytes.s_b2, bytes.s_b3, bytes.s_b4};
}
sockaddr TranslateFromSockAddrIn(SockAddrIn input) {
sockaddr_in result;
@@ -138,12 +137,6 @@ void Initialize() {}
void Finalize() {}
constexpr IPv4Address TranslateIPv4(in_addr addr) {
const u32 bytes = addr.s_addr;
return IPv4Address{static_cast<u8>(bytes), static_cast<u8>(bytes >> 8),
static_cast<u8>(bytes >> 16), static_cast<u8>(bytes >> 24)};
}
sockaddr TranslateFromSockAddrIn(SockAddrIn input) {
sockaddr_in result;
@@ -182,7 +175,7 @@ linger MakeLinger(bool enable, u32 linger_value) {
}
bool EnableNonBlock(int fd, bool enable) {
int flags = fcntl(fd, F_GETFD);
int flags = fcntl(fd, F_GETFL);
if (flags == -1) {
return false;
}
@@ -191,7 +184,7 @@ bool EnableNonBlock(int fd, bool enable) {
} else {
flags &= ~O_NONBLOCK;
}
return fcntl(fd, F_SETFD, flags) == 0;
return fcntl(fd, F_SETFL, flags) == 0;
}
Errno TranslateNativeError(int e) {
@@ -227,8 +220,12 @@ Errno GetAndLogLastError() {
#else
int e = errno;
#endif
LOG_ERROR(Network, "Socket operation error: {}", NativeErrorToString(e));
return TranslateNativeError(e);
const Errno err = TranslateNativeError(e);
if (err == Errno::AGAIN) {
return err;
}
LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e));
return err;
}
int TranslateDomain(Domain domain) {
@@ -353,27 +350,29 @@ NetworkInstance::~NetworkInstance() {
Finalize();
}
std::pair<IPv4Address, Errno> GetHostIPv4Address() {
std::array<char, 256> name{};
if (gethostname(name.data(), static_cast<int>(name.size()) - 1) == SOCKET_ERROR) {
return {IPv4Address{}, GetAndLogLastError()};
std::optional<IPv4Address> GetHostIPv4Address() {
const std::string& selected_network_interface = Settings::values.network_interface.GetValue();
const auto network_interfaces = Network::GetAvailableNetworkInterfaces();
if (network_interfaces.size() == 0) {
LOG_ERROR(Network, "GetAvailableNetworkInterfaces returned no interfaces");
return {};
}
hostent* const ent = gethostbyname(name.data());
if (!ent) {
return {IPv4Address{}, GetAndLogLastError()};
}
if (ent->h_addr_list == nullptr) {
UNIMPLEMENTED_MSG("No addr provided in hostent->h_addr_list");
return {IPv4Address{}, Errno::SUCCESS};
}
if (ent->h_length != sizeof(in_addr)) {
UNIMPLEMENTED_MSG("Unexpected size={} in hostent->h_length", ent->h_length);
}
const auto res =
std::ranges::find_if(network_interfaces, [&selected_network_interface](const auto& iface) {
return iface.name == selected_network_interface;
});
in_addr addr;
std::memcpy(&addr, ent->h_addr_list[0], sizeof(addr));
return {TranslateIPv4(addr), Errno::SUCCESS};
if (res != network_interfaces.end()) {
char ip_addr[16] = {};
ASSERT(inet_ntop(AF_INET, &res->ip_address, ip_addr, sizeof(ip_addr)) != nullptr);
LOG_INFO(Network, "IP address: {}", ip_addr);
return TranslateIPv4(res->ip_address);
} else {
LOG_ERROR(Network, "Couldn't find selected interface \"{}\"", selected_network_interface);
return {};
}
}
std::pair<s32, Errno> Poll(std::vector<PollFD>& pollfds, s32 timeout) {

View File

@@ -5,11 +5,18 @@
#pragma once
#include <array>
#include <optional>
#include <utility>
#include "common/common_funcs.h"
#include "common/common_types.h"
#ifdef _WIN32
#include <winsock2.h>
#elif YUZU_UNIX
#include <netinet/in.h>
#endif
namespace Network {
class Socket;
@@ -92,8 +99,21 @@ public:
~NetworkInstance();
};
#ifdef _WIN32
constexpr IPv4Address TranslateIPv4(in_addr addr) {
auto& bytes = addr.S_un.S_un_b;
return IPv4Address{bytes.s_b1, bytes.s_b2, bytes.s_b3, bytes.s_b4};
}
#elif YUZU_UNIX
constexpr IPv4Address TranslateIPv4(in_addr addr) {
const u32 bytes = addr.s_addr;
return IPv4Address{static_cast<u8>(bytes), static_cast<u8>(bytes >> 8),
static_cast<u8>(bytes >> 16), static_cast<u8>(bytes >> 24)};
}
#endif
/// @brief Returns host's IPv4 address
/// @return Pair of an array of human ordered IPv4 address (e.g. 192.168.0.1) and an error code
std::pair<IPv4Address, Errno> GetHostIPv4Address();
/// @return human ordered IPv4 address (e.g. 192.168.0.1) as an array
std::optional<IPv4Address> GetHostIPv4Address();
} // namespace Network

View File

@@ -0,0 +1,210 @@
// Copyright 2021 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <fstream>
#include <sstream>
#include <vector>
#include "common/bit_cast.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "common/string_util.h"
#include "core/network/network_interface.h"
#ifdef _WIN32
#include <iphlpapi.h>
#else
#include <cerrno>
#include <ifaddrs.h>
#include <net/if.h>
#endif
namespace Network {
#ifdef _WIN32
std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
std::vector<IP_ADAPTER_ADDRESSES> adapter_addresses;
DWORD ret = ERROR_BUFFER_OVERFLOW;
DWORD buf_size = 0;
// retry up to 5 times
for (int i = 0; i < 5 && ret == ERROR_BUFFER_OVERFLOW; i++) {
ret = GetAdaptersAddresses(
AF_INET, GAA_FLAG_SKIP_MULTICAST | GAA_FLAG_SKIP_DNS_SERVER | GAA_FLAG_INCLUDE_GATEWAYS,
nullptr, adapter_addresses.data(), &buf_size);
if (ret != ERROR_BUFFER_OVERFLOW) {
break;
}
adapter_addresses.resize((buf_size / sizeof(IP_ADAPTER_ADDRESSES)) + 1);
}
if (ret != NO_ERROR) {
LOG_ERROR(Network, "Failed to get network interfaces with GetAdaptersAddresses");
return {};
}
std::vector<NetworkInterface> result;
for (auto current_address = adapter_addresses.data(); current_address != nullptr;
current_address = current_address->Next) {
if (current_address->FirstUnicastAddress == nullptr ||
current_address->FirstUnicastAddress->Address.lpSockaddr == nullptr) {
continue;
}
if (current_address->OperStatus != IfOperStatusUp) {
continue;
}
const auto ip_addr = Common::BitCast<struct sockaddr_in>(
*current_address->FirstUnicastAddress->Address.lpSockaddr)
.sin_addr;
ULONG mask = 0;
if (ConvertLengthToIpv4Mask(current_address->FirstUnicastAddress->OnLinkPrefixLength,
&mask) != NO_ERROR) {
LOG_ERROR(Network, "Failed to convert IPv4 prefix length to subnet mask");
continue;
}
struct in_addr gateway = {.S_un{.S_addr{0}}};
if (current_address->FirstGatewayAddress != nullptr &&
current_address->FirstGatewayAddress->Address.lpSockaddr != nullptr) {
gateway = Common::BitCast<struct sockaddr_in>(
*current_address->FirstGatewayAddress->Address.lpSockaddr)
.sin_addr;
}
result.emplace_back(NetworkInterface{
.name{Common::UTF16ToUTF8(std::wstring{current_address->FriendlyName})},
.ip_address{ip_addr},
.subnet_mask = in_addr{.S_un{.S_addr{mask}}},
.gateway = gateway});
}
return result;
}
#else
std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
struct ifaddrs* ifaddr = nullptr;
if (getifaddrs(&ifaddr) != 0) {
LOG_ERROR(Network, "Failed to get network interfaces with getifaddrs: {}",
std::strerror(errno));
return {};
}
std::vector<NetworkInterface> result;
for (auto ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) {
if (ifa->ifa_addr == nullptr || ifa->ifa_netmask == nullptr) {
continue;
}
if (ifa->ifa_addr->sa_family != AF_INET) {
continue;
}
if ((ifa->ifa_flags & IFF_UP) == 0 || (ifa->ifa_flags & IFF_LOOPBACK) != 0) {
continue;
}
u32 gateway{};
std::ifstream file{"/proc/net/route"};
if (!file.is_open()) {
LOG_ERROR(Network, "Failed to open \"/proc/net/route\"");
result.emplace_back(NetworkInterface{
.name{ifa->ifa_name},
.ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr},
.subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr},
.gateway{in_addr{.s_addr = gateway}}});
continue;
}
// ignore header
file.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
bool gateway_found = false;
for (std::string line; std::getline(file, line);) {
std::istringstream iss{line};
std::string iface_name;
iss >> iface_name;
if (iface_name != ifa->ifa_name) {
continue;
}
iss >> std::hex;
u32 dest{};
iss >> dest;
if (dest != 0) {
// not the default route
continue;
}
iss >> gateway;
u16 flags{};
iss >> flags;
// flag RTF_GATEWAY (defined in <linux/route.h>)
if ((flags & 0x2) == 0) {
continue;
}
gateway_found = true;
break;
}
if (!gateway_found) {
gateway = 0;
}
result.emplace_back(NetworkInterface{
.name{ifa->ifa_name},
.ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr},
.subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr},
.gateway{in_addr{.s_addr = gateway}}});
}
freeifaddrs(ifaddr);
return result;
}
#endif
std::optional<NetworkInterface> GetSelectedNetworkInterface() {
const auto& selected_network_interface = Settings::values.network_interface.GetValue();
const auto network_interfaces = Network::GetAvailableNetworkInterfaces();
if (network_interfaces.size() == 0) {
LOG_ERROR(Network, "GetAvailableNetworkInterfaces returned no interfaces");
return std::nullopt;
}
const auto res =
std::ranges::find_if(network_interfaces, [&selected_network_interface](const auto& iface) {
return iface.name == selected_network_interface;
});
if (res == network_interfaces.end()) {
LOG_ERROR(Network, "Couldn't find selected interface \"{}\"", selected_network_interface);
return std::nullopt;
}
return *res;
}
} // namespace Network

View File

@@ -0,0 +1,29 @@
// Copyright 2021 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <string>
#include <vector>
#ifdef _WIN32
#include <winsock2.h>
#else
#include <netinet/in.h>
#endif
namespace Network {
struct NetworkInterface {
std::string name;
struct in_addr ip_address;
struct in_addr subnet_mask;
struct in_addr gateway;
};
std::vector<NetworkInterface> GetAvailableNetworkInterfaces();
std::optional<NetworkInterface> GetSelectedNetworkInterface();
} // namespace Network

View File

@@ -72,6 +72,18 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
return "Unknown";
}
static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) {
switch (backend) {
case Settings::NvdecEmulation::Off:
return "Off";
case Settings::NvdecEmulation::CPU:
return "CPU";
case Settings::NvdecEmulation::GPU:
return "GPU";
}
return "Unknown";
}
u64 GetTelemetryId() {
u64 telemetry_id{};
const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id";
@@ -229,8 +241,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue()));
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation.GetValue());
AddField(field_type, "Renderer_UseNvdecEmulation",
Settings::values.use_nvdec_emulation.GetValue());
AddField(field_type, "Renderer_NvdecEmulation",
TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue()));
AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
AddField(field_type, "Renderer_ShaderBackend",

View File

@@ -889,6 +889,9 @@ SDLState::SDLState() {
RegisterFactory<VibrationDevice>("sdl", vibration_factory);
RegisterFactory<MotionDevice>("sdl", motion_factory);
// Disable raw input. When enabled this setting causes SDL to die when a web applet opens
SDL_SetHint(SDL_HINT_JOYSTICK_RAWINPUT, "0");
// Enable HIDAPI rumble. This prevents SDL from disabling motion on PS4 and PS5 controllers
SDL_SetHint(SDL_HINT_JOYSTICK_HIDAPI_PS4_RUMBLE, "1");
SDL_SetHint(SDL_HINT_JOYSTICK_HIDAPI_PS5_RUMBLE, "1");

View File

@@ -11,6 +11,8 @@
namespace Shader::Backend::GLSL {
namespace {
constexpr char THREAD_ID[]{"gl_SubGroupInvocationARB"};
void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
if (!in_bounds) {
@@ -43,84 +45,100 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
SetInBoundsFlag(ctx, inst);
}
std::string_view BallotIndex(EmitContext& ctx) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ".x";
}
return "[gl_SubGroupInvocationARB>>5]";
}
std::string GetMask(EmitContext& ctx, std::string_view mask) {
const auto ballot_index{BallotIndex(ctx)};
return fmt::format("uint(uvec2({}){})", mask, ballot_index);
}
} // Anonymous namespace
void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
ctx.AddU32("{}={}&31u;", inst, THREAD_ID);
}
void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
} else {
const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
return;
}
const auto ballot_index{BallotIndex(ctx)};
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
}
void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
} else {
const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
return;
}
const auto ballot_index{BallotIndex(ctx)};
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
}
void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
} else {
const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
const auto value{fmt::format("({}^{})", ballot, active_mask)};
ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
return;
}
const auto ballot_index{BallotIndex(ctx)};
const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
const auto value{fmt::format("({}^{})", ballot, active_mask)};
ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
}
void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
} else {
ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
}
const auto ballot_index{BallotIndex(ctx)};
ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index);
}
void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst);
ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupEqMaskARB"));
}
void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst);
ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLtMaskARB"));
}
void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst);
ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLeMaskARB"));
}
void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst);
ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGtMaskARB"));
}
void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst);
ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGeMaskARB"));
}
void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
std::string_view index, std::string_view clamp,
std::string_view segmentation_mask) {
std::string_view index, std::string_view clamp, std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask);
UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, seg_mask);
return;
}
const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
const auto thread_id{"gl_SubGroupInvocationARB"};
const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)};
const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
const auto upper_index{fmt::format("{}?{}+32:{}", is_upper_partition, index, index)};
const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
const auto lhs{fmt::format("({}&{})", index, not_seg_mask)};
const auto not_seg_mask{fmt::format("(~{})", seg_mask)};
const auto min_thread_id{ComputeMinThreadId(THREAD_ID, seg_mask)};
const auto max_thread_id{
ComputeMaxThreadId(min_thread_id, big_warp ? upper_clamp : clamp, not_seg_mask)};
const auto lhs{fmt::format("({}&{})", big_warp ? upper_index : index, not_seg_mask)};
const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
@@ -128,29 +146,34 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
}
void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
std::string_view clamp, std::string_view segmentation_mask) {
std::string_view clamp, std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask);
UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, seg_mask);
return;
}
const auto thread_id{"gl_SubGroupInvocationARB"};
const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)};
ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
}
void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
std::string_view index, std::string_view clamp,
std::string_view segmentation_mask) {
std::string_view index, std::string_view clamp, std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask);
UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, seg_mask);
return;
}
const auto thread_id{"gl_SubGroupInvocationARB"};
const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
@@ -158,14 +181,17 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
std::string_view index, std::string_view clamp,
std::string_view segmentation_mask) {
std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask);
UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, seg_mask);
return;
}
const auto thread_id{"gl_SubGroupInvocationARB"};
const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);

View File

@@ -15,6 +15,8 @@
namespace Shader::Backend::SPIRV {
namespace {
constexpr size_t NUM_FIXEDFNCTEXTURE = 10;
enum class Operation {
Increment,
Decrement,
@@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
return pointer_type;
}
}
size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations,
size_t start_offset) {
for (size_t location = start_offset; location < used_locations.size(); ++location) {
if (!used_locations.test(location)) {
return location;
}
}
throw RuntimeError("Unable to get an unused location for legacy attribute");
}
} // Anonymous namespace
void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
loads[IR::Attribute::TessellationEvaluationPointV]) {
tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
}
std::bitset<IR::NUM_GENERICS> used_locations{};
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
const AttributeType input_type{runtime_info.generic_input_types[index]};
if (!runtime_info.previous_stage_stores.Generic(index)) {
@@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
if (input_type == AttributeType::Disabled) {
continue;
}
used_locations.set(index);
const Id type{GetAttributeType(*this, input_type)};
const Id id{DefineInput(*this, type, true)};
Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
@@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) {
break;
}
}
size_t previous_unused_location = 0;
if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
previous_unused_location = location;
used_locations.set(location);
const Id id{DefineInput(*this, F32[4], true)};
Decorate(id, spv::Decoration::Location, location);
input_front_color = id;
}
for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
const size_t location =
FindNextUnusedLocation(used_locations, previous_unused_location);
previous_unused_location = location;
used_locations.set(location);
const Id id{DefineInput(*this, F32[4], true)};
Decorate(id, spv::Decoration::Location, location);
input_fixed_fnc_textures[index] = id;
}
}
if (stage == Stage::TessellationEval) {
for (size_t index = 0; index < info.uses_patches.size(); ++index) {
if (!info.uses_patches[index]) {
@@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
spv::BuiltIn::ViewportMaskNV);
}
std::bitset<IR::NUM_GENERICS> used_locations{};
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
if (info.stores.Generic(index)) {
DefineGenericOutput(*this, index, invocations);
used_locations.set(index);
}
}
size_t previous_unused_location = 0;
if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
previous_unused_location = location;
used_locations.set(location);
const Id id{DefineOutput(*this, F32[4], invocations)};
Decorate(id, spv::Decoration::Location, static_cast<u32>(location));
output_front_color = id;
}
for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
const size_t location =
FindNextUnusedLocation(used_locations, previous_unused_location);
previous_unused_location = location;
used_locations.set(location);
const Id id{DefineOutput(*this, F32[4], invocations)};
Decorate(id, spv::Decoration::Location, location);
output_fixed_fnc_textures[index] = id;
}
}
switch (stage) {

View File

@@ -268,10 +268,14 @@ public:
Id write_global_func_u32x4{};
Id input_position{};
Id input_front_color{};
std::array<Id, 10> input_fixed_fnc_textures{};
std::array<Id, 32> input_generics{};
Id output_point_size{};
Id output_position{};
Id output_front_color{};
std::array<Id, 10> output_fixed_fnc_textures{};
std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
Id output_tess_level_outer{};

View File

@@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&...
}
}
bool IsFixedFncTexture(IR::Attribute attribute) {
return attribute >= IR::Attribute::FixedFncTexture0S &&
attribute <= IR::Attribute::FixedFncTexture9Q;
}
u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) {
if (!IsFixedFncTexture(attribute)) {
throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
}
return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u;
}
u32 FixedFncTextureAttributeElement(IR::Attribute attribute) {
if (!IsFixedFncTexture(attribute)) {
throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
}
return static_cast<u32>(attribute) % 4u;
}
template <typename... Args>
Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
if (ctx.stage == Stage::TessellationControl) {
@@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
}
}
if (IsFixedFncTexture(attr)) {
const u32 index{FixedFncTextureAttributeIndex(attr)};
const u32 element{FixedFncTextureAttributeElement(attr)};
const Id element_id{ctx.Const(element)};
return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index],
element_id);
}
switch (attr) {
case IR::Attribute::PointSize:
return ctx.output_point_size;
@@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
const Id element_id{ctx.Const(element)};
return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
}
case IR::Attribute::ColorFrontDiffuseR:
case IR::Attribute::ColorFrontDiffuseG:
case IR::Attribute::ColorFrontDiffuseB:
case IR::Attribute::ColorFrontDiffuseA: {
const u32 element{static_cast<u32>(attr) % 4};
const Id element_id{ctx.Const(element)};
return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id);
}
case IR::Attribute::ClipDistance0:
case IR::Attribute::ClipDistance1:
case IR::Attribute::ClipDistance2:
@@ -298,19 +332,21 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
if (IR::IsGeneric(attr)) {
const u32 index{IR::GenericAttributeIndex(attr)};
const std::optional<AttrInfo> type{AttrTypes(ctx, index)};
if (!type) {
// Attribute is disabled
if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
// Attribute is disabled or varying component is not written
return ctx.Const(element == 3 ? 1.0f : 0.0f);
}
if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
// Varying component is not written
return ctx.Const(type && element == 3 ? 1.0f : 0.0f);
}
const Id generic_id{ctx.input_generics.at(index)};
const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))};
const Id value{ctx.OpLoad(type->id, pointer)};
return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
}
if (IsFixedFncTexture(attr)) {
const u32 index{FixedFncTextureAttributeIndex(attr)};
const Id attr_id{ctx.input_fixed_fnc_textures[index]};
const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))};
return ctx.OpLoad(ctx.F32[1], attr_ptr);
}
switch (attr) {
case IR::Attribute::PrimitiveId:
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
@@ -320,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
case IR::Attribute::PositionW:
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
ctx.Const(element)));
case IR::Attribute::ColorFrontDiffuseR:
case IR::Attribute::ColorFrontDiffuseG:
case IR::Attribute::ColorFrontDiffuseB:
case IR::Attribute::ColorFrontDiffuseA: {
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color,
ctx.Const(element)));
}
case IR::Attribute::InstanceId:
if (ctx.profile.support_vertex_instance_id) {
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
@@ -337,8 +380,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
}
case IR::Attribute::FrontFace:
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value);
return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
ctx.f32_zero_value);
case IR::Attribute::PointSpriteS:
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));

View File

@@ -7,8 +7,13 @@
namespace Shader::Backend::SPIRV {
namespace {
Id GetThreadId(EmitContext& ctx) {
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
}
Id WarpExtract(EmitContext& ctx, Id value) {
const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id thread_id{GetThreadId(ctx)};
const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))};
return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
}
@@ -48,10 +53,17 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
return ctx.OpSelect(ctx.U32[1], in_range,
ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
}
Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
const Id thirty_two{ctx.Const(32u)};
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
}
} // Anonymous namespace
Id EmitLaneId(EmitContext& ctx) {
const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id id{GetThreadId(ctx)};
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return id;
}
@@ -123,7 +135,15 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
const Id thirty_two{ctx.Const(32u)};
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
}
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
@@ -137,7 +157,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -148,7 +171,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -159,7 +185,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};

View File

@@ -20,6 +20,7 @@
#include "shader_recompiler/frontend/maxwell/decode.h"
#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
#include "shader_recompiler/frontend/maxwell/translate/translate.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::Maxwell {
@@ -652,7 +653,7 @@ class TranslatePass {
public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_)
IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
syntax_list{syntax_list_} {
Visit(root_stmt, nullptr, nullptr);
@@ -660,6 +661,9 @@ public:
IR::Block& first_block{*syntax_list.front().data.block};
IR::IREmitter ir(first_block, first_block.begin());
ir.Prologue();
if (uses_demote_to_helper && host_info.needs_demote_reorder) {
DemoteCombinationPass();
}
}
private:
@@ -809,7 +813,14 @@ private:
}
case StatementType::Return: {
ensure_block();
IR::IREmitter{*current_block}.Epilogue();
IR::Block* return_block{block_pool.Create(inst_pool)};
IR::IREmitter{*return_block}.Epilogue();
current_block->AddBranch(return_block);
auto& merge{syntax_list.emplace_back()};
merge.type = IR::AbstractSyntaxNode::Type::Block;
merge.data.block = return_block;
current_block = nullptr;
syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
break;
@@ -824,6 +835,7 @@ private:
auto& merge{syntax_list.emplace_back()};
merge.type = IR::AbstractSyntaxNode::Type::Block;
merge.data.block = demote_block;
uses_demote_to_helper = true;
break;
}
case StatementType::Unreachable: {
@@ -855,11 +867,117 @@ private:
return block_pool.Create(inst_pool);
}
void DemoteCombinationPass() {
using Type = IR::AbstractSyntaxNode::Type;
std::vector<IR::Block*> demote_blocks;
std::vector<IR::U1> demote_conds;
u32 num_epilogues{};
u32 branch_depth{};
for (const IR::AbstractSyntaxNode& node : syntax_list) {
if (node.type == Type::If) {
++branch_depth;
}
if (node.type == Type::EndIf) {
--branch_depth;
}
if (node.type != Type::Block) {
continue;
}
if (branch_depth > 1) {
// Skip reordering nested demote branches.
continue;
}
for (const IR::Inst& inst : node.data.block->Instructions()) {
const IR::Opcode op{inst.GetOpcode()};
if (op == IR::Opcode::DemoteToHelperInvocation) {
demote_blocks.push_back(node.data.block);
break;
}
if (op == IR::Opcode::Epilogue) {
++num_epilogues;
}
}
}
if (demote_blocks.size() == 0) {
return;
}
if (num_epilogues > 1) {
LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented.");
return;
}
s64 last_iterator_offset{};
auto& asl{syntax_list};
for (const IR::Block* demote_block : demote_blocks) {
const auto start_it{asl.begin() + last_iterator_offset};
auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
return asn.type == Type::If && asn.data.if_node.body == demote_block;
})};
if (asl_it == asl.end()) {
// Demote without a conditional branch.
// No need to proceed since all fragment instances will be demoted regardless.
return;
}
const IR::Block* const end_if = asl_it->data.if_node.merge;
demote_conds.push_back(asl_it->data.if_node.cond);
last_iterator_offset = std::distance(asl.begin(), asl_it);
asl_it = asl.erase(asl_it);
asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
return asn.type == Type::Block && asn.data.block == demote_block;
});
asl_it = asl.erase(asl_it);
asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
return asn.type == Type::EndIf && asn.data.end_if.merge == end_if;
});
asl_it = asl.erase(asl_it);
}
const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) {
if (asn.type != Type::Block) {
return false;
}
for (const auto& inst : asn.data.block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::Epilogue) {
return true;
}
}
return false;
}};
const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)};
const auto return_block_it{(reverse_it + 1).base()};
IR::IREmitter ir{*(return_block_it - 1)->data.block};
IR::U1 cond(IR::Value(false));
for (const auto& demote_cond : demote_conds) {
cond = ir.LogicalOr(cond, demote_cond);
}
cond.Inst()->DestructiveAddUsage(1);
IR::AbstractSyntaxNode demote_if_node{};
demote_if_node.type = Type::If;
demote_if_node.data.if_node.cond = cond;
demote_if_node.data.if_node.body = demote_blocks[0];
demote_if_node.data.if_node.merge = return_block_it->data.block;
IR::AbstractSyntaxNode demote_node{};
demote_node.type = Type::Block;
demote_node.data.block = demote_blocks[0];
IR::AbstractSyntaxNode demote_endif_node{};
demote_endif_node.type = Type::EndIf;
demote_endif_node.data.end_if.merge = return_block_it->data.block;
asl.insert(return_block_it, demote_endif_node);
asl.insert(return_block_it, demote_node);
asl.insert(return_block_it, demote_if_node);
}
ObjectPool<Statement>& stmt_pool;
ObjectPool<IR::Inst>& inst_pool;
ObjectPool<IR::Block>& block_pool;
Environment& env;
IR::AbstractSyntaxList& syntax_list;
bool uses_demote_to_helper{};
// TODO: C++20 Remove this when all compilers support constexpr std::vector
#if __cpp_lib_constexpr_vector >= 201907
@@ -871,12 +989,13 @@ private:
} // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
Environment& env, Flow::CFG& cfg) {
Environment& env, Flow::CFG& cfg,
const HostTranslateInfo& host_info) {
ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info};
return syntax_list;
}

View File

@@ -11,10 +11,13 @@
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::Maxwell {
namespace Shader {
struct HostTranslateInfo;
namespace Maxwell {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Environment& env,
Flow::CFG& cfg);
Flow::CFG& cfg, const HostTranslateInfo& host_info);
} // namespace Shader::Maxwell
} // namespace Maxwell
} // namespace Shader

View File

@@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) {
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
IR::Program program;
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = PostOrder(program.syntax_list.front());
program.stage = env.ShaderStage();

View File

@@ -11,8 +11,9 @@ namespace Shader {
/// Misc information about the host
struct HostTranslateInfo {
bool support_float16{}; ///< True when the device supports 16-bit floats
bool support_int64{}; ///< True when the device supports 64-bit integers
bool support_float16{}; ///< True when the device supports 16-bit floats
bool support_int64{}; ///< True when the device supports 64-bit integers
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
};
} // namespace Shader

View File

@@ -4,11 +4,13 @@
#include <catch2/catch.hpp>
#include <math.h>
#include "common/logging/backend.h"
#include "common/param_package.h"
namespace Common {
TEST_CASE("ParamPackage", "[common]") {
Common::Log::DisableLoggingInTests();
ParamPackage original{
{"abc", "xyz"},
{"def", "42"},

View File

@@ -231,6 +231,7 @@ endif()
target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
add_dependencies(video_core host_shaders)
target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})

View File

@@ -261,16 +261,6 @@ public:
stream_score += score;
}
/// Sets the new frame tick
void SetFrameTick(u64 new_frame_tick) noexcept {
frame_tick = new_frame_tick;
}
/// Returns the new frame tick
[[nodiscard]] u64 FrameTick() const noexcept {
return frame_tick;
}
/// Returns the likeliness of this being a stream buffer
[[nodiscard]] int StreamScore() const noexcept {
return stream_score;
@@ -307,6 +297,14 @@ public:
return words.size_bytes;
}
size_t getLRUID() const noexcept {
return lru_id;
}
void setLRUID(size_t lru_id_) {
lru_id = lru_id_;
}
private:
template <Type type>
u64* Array() noexcept {
@@ -603,9 +601,9 @@ private:
RasterizerInterface* rasterizer = nullptr;
VAddr cpu_addr = 0;
Words words;
u64 frame_tick = 0;
BufferFlagBits flags{};
int stream_score = 0;
size_t lru_id = SIZE_MAX;
};
} // namespace VideoCommon

View File

@@ -20,6 +20,7 @@
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/literals.h"
#include "common/lru_cache.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
@@ -330,7 +331,7 @@ private:
template <bool insert>
void ChangeRegister(BufferId buffer_id);
void TouchBuffer(Buffer& buffer) const noexcept;
void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
@@ -428,7 +429,11 @@ private:
size_t immediate_buffer_capacity = 0;
std::unique_ptr<u8[]> immediate_buffer_alloc;
typename SlotVector<Buffer>::Iterator deletion_iterator;
struct LRUItemParams {
using ObjectType = BufferId;
using TickType = u64;
};
Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
u64 frame_tick = 0;
u64 total_used_memory = 0;
@@ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
deletion_iterator = slot_buffers.end();
common_ranges.clear();
}
@@ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() {
const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
int num_iterations = aggressive_gc ? 64 : 32;
for (; num_iterations > 0; --num_iterations) {
if (deletion_iterator == slot_buffers.end()) {
deletion_iterator = slot_buffers.begin();
const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
if (num_iterations == 0) {
return true;
}
++deletion_iterator;
if (deletion_iterator == slot_buffers.end()) {
break;
}
const auto [buffer_id, buffer] = *deletion_iterator;
if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
DownloadBufferMemory(*buffer);
DeleteBuffer(buffer_id);
}
}
--num_iterations;
auto& buffer = slot_buffers[buffer_id];
DownloadBufferMemory(buffer);
DeleteBuffer(buffer_id);
return false;
};
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
}
template <class P>
@@ -485,7 +486,7 @@ void BufferCache<P>::TickFrame() {
const bool skip_preferred = hits * 256 < shots * 251;
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) {
if (total_used_memory >= EXPECTED_MEMORY) {
RunGarbageCollector();
}
++frame_tick;
@@ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
template <class P>
void BufferCache<P>::BindHostIndexBuffer() {
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
TouchBuffer(buffer);
TouchBuffer(buffer, index_buffer.buffer_id);
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
const u32 size = index_buffer.size;
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
@@ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
const Binding& binding = vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer);
TouchBuffer(buffer, binding.buffer_id);
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
@@ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const VAddr cpu_addr = binding.cpu_addr;
const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer);
TouchBuffer(buffer, binding.buffer_id);
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
size <= uniform_buffer_skip_cache_size &&
!buffer.IsRegionGpuModified(cpu_addr, size);
@@ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
const Binding& binding = storage_buffers[stage][index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer);
TouchBuffer(buffer, binding.buffer_id);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
const Binding& binding = transform_feedback_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer);
TouchBuffer(buffer, binding.buffer_id);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
const Binding& binding = compute_uniform_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer);
TouchBuffer(buffer, binding.buffer_id);
const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
const Binding& binding = compute_storage_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer);
TouchBuffer(buffer, binding.buffer_id);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
TouchBuffer(slot_buffers[new_buffer_id]);
for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
}
Register(new_buffer_id);
TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id);
return new_buffer_id;
}
@@ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) {
template <class P>
template <bool insert>
void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
const Buffer& buffer = slot_buffers[buffer_id];
Buffer& buffer = slot_buffers[buffer_id];
const auto size = buffer.SizeBytes();
if (insert) {
total_used_memory += Common::AlignUp(size, 1024);
buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick));
} else {
total_used_memory -= Common::AlignUp(size, 1024);
lru_cache.Free(buffer.getLRUID());
}
const VAddr cpu_addr_begin = buffer.CpuAddr();
const VAddr cpu_addr_end = cpu_addr_begin + size;
@@ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
}
template <class P>
void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept {
buffer.SetFrameTick(frame_tick);
void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
if (buffer_id != NULL_BUFFER_ID) {
lru_cache.Touch(buffer.getLRUID(), frame_tick);
}
}
template <class P>

View File

@@ -5,6 +5,7 @@
#include <fstream>
#include <vector>
#include "common/assert.h"
#include "common/settings.h"
#include "video_core/command_classes/codecs/codec.h"
#include "video_core/command_classes/codecs/h264.h"
#include "video_core/command_classes/codecs/vp9.h"
@@ -16,44 +17,28 @@ extern "C" {
}
namespace Tegra {
#if defined(LIBVA_FOUND)
// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license
namespace {
constexpr std::array<const char*, 2> VAAPI_DRIVERS = {
"i915",
"amdgpu",
};
constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) {
void AVPacketDeleter(AVPacket* ptr) {
av_packet_free(&ptr);
}
using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
if (*p == AV_PIX_FMT_VAAPI) {
return AV_PIX_FMT_VAAPI;
if (*p == av_codec_ctx->pix_fmt) {
return av_codec_ctx->pix_fmt;
}
}
LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
return *pix_fmts;
}
bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) {
AVDictionary* hwdevice_options = nullptr;
av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
for (const auto& driver : VAAPI_DRIVERS) {
av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI,
nullptr, hwdevice_options, 0);
if (hwdevice_error >= 0) {
LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
av_dict_free(&hwdevice_options);
return true;
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
av_dict_free(&hwdevice_options);
return false;
av_buffer_unref(&av_codec_ctx->hw_device_ctx);
av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
return PREFERRED_CPU_FMT;
}
} // namespace
#endif
void AVFrameDeleter(AVFrame* ptr) {
av_frame_free(&ptr);
@@ -68,56 +53,110 @@ Codec::~Codec() {
return;
}
// Free libav memory
avcodec_send_packet(av_codec_ctx, nullptr);
AVFrame* av_frame = av_frame_alloc();
avcodec_receive_frame(av_codec_ctx, av_frame);
avcodec_flush_buffers(av_codec_ctx);
av_frame_free(&av_frame);
avcodec_close(av_codec_ctx);
av_buffer_unref(&av_hw_device);
avcodec_free_context(&av_codec_ctx);
av_buffer_unref(&av_gpu_decoder);
}
void Codec::InitializeHwdec() {
// Prioritize integrated GPU to mitigate bandwidth bottlenecks
bool Codec::CreateGpuAvDevice() {
#if defined(LIBVA_FOUND)
if (CreateVaapiHwdevice(&av_hw_device)) {
const auto hw_device_ctx = av_buffer_ref(av_hw_device);
ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
av_codec_ctx->hw_device_ctx = hw_device_ctx;
av_codec_ctx->get_format = GetHwFormat;
static constexpr std::array<const char*, 3> VAAPI_DRIVERS = {
"i915",
"iHD",
"amdgpu",
};
AVDictionary* hwdevice_options = nullptr;
av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
for (const auto& driver : VAAPI_DRIVERS) {
av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI,
nullptr, hwdevice_options, 0);
if (hwdevice_error >= 0) {
LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
av_dict_free(&hwdevice_options);
av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI;
return true;
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
av_dict_free(&hwdevice_options);
#endif
static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
static constexpr std::array GPU_DECODER_TYPES{
AV_HWDEVICE_TYPE_CUDA,
#ifdef _WIN32
AV_HWDEVICE_TYPE_D3D11VA,
#else
AV_HWDEVICE_TYPE_VDPAU,
#endif
};
for (const auto& type : GPU_DECODER_TYPES) {
const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
if (hwdevice_res < 0) {
LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
av_hwdevice_get_type_name(type), hwdevice_res);
continue;
}
for (int i = 0;; i++) {
const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
if (!config) {
LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
av_codec->name, av_hwdevice_get_type_name(type));
break;
}
if (config->methods & HW_CONFIG_METHOD && config->device_type == type) {
av_codec_ctx->pix_fmt = config->pix_fmt;
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
return true;
}
}
}
return false;
}
void Codec::InitializeAvCodecContext() {
av_codec_ctx = avcodec_alloc_context3(av_codec);
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
}
void Codec::InitializeGpuDecoder() {
if (!CreateGpuAvDevice()) {
av_buffer_unref(&av_gpu_decoder);
return;
}
#endif
// TODO more GPU accelerated decoders
auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
av_codec_ctx->hw_device_ctx = hw_device_ctx;
av_codec_ctx->get_format = GetGpuFormat;
}
void Codec::Initialize() {
AVCodecID codec;
switch (current_codec) {
case NvdecCommon::VideoCodec::H264:
codec = AV_CODEC_ID_H264;
break;
case NvdecCommon::VideoCodec::Vp9:
codec = AV_CODEC_ID_VP9;
break;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
const AVCodecID codec = [&] {
switch (current_codec) {
case NvdecCommon::VideoCodec::H264:
return AV_CODEC_ID_H264;
case NvdecCommon::VideoCodec::Vp9:
return AV_CODEC_ID_VP9;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
return AV_CODEC_ID_NONE;
}
}();
av_codec = avcodec_find_decoder(codec);
InitializeAvCodecContext();
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) {
InitializeGpuDecoder();
}
if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
avcodec_free_context(&av_codec_ctx);
av_buffer_unref(&av_gpu_decoder);
return;
}
av_codec = avcodec_find_decoder(codec);
av_codec_ctx = avcodec_alloc_context3(av_codec);
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
InitializeHwdec();
if (!av_codec_ctx->hw_device_ctx) {
LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
}
const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
if (av_error < 0) {
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
avcodec_close(av_codec_ctx);
av_buffer_unref(&av_hw_device);
return;
}
initialized = true;
}
@@ -133,6 +172,9 @@ void Codec::Decode() {
if (is_first_frame) {
Initialize();
}
if (!initialized) {
return;
}
bool vp9_hidden_frame = false;
std::vector<u8> frame_data;
if (current_codec == NvdecCommon::VideoCodec::H264) {
@@ -141,50 +183,48 @@ void Codec::Decode() {
frame_data = vp9_decoder->ComposeFrameHeader(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
}
AVPacket packet{};
av_init_packet(&packet);
packet.data = frame_data.data();
packet.size = static_cast<s32>(frame_data.size());
if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret);
AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
if (!packet) {
LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
return;
}
packet->data = frame_data.data();
packet->size = static_cast<s32>(frame_data.size());
if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
return;
}
// Only receive/store visible frames
if (vp9_hidden_frame) {
return;
}
AVFrame* hw_frame = av_frame_alloc();
AVFrame* sw_frame = hw_frame;
ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed");
if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) {
AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
AVFramePtr final_frame{nullptr, AVFrameDeleter};
ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
av_frame_free(&hw_frame);
return;
}
if (!hw_frame->width || !hw_frame->height) {
if (initial_frame->width == 0 || initial_frame->height == 0) {
LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
av_frame_free(&hw_frame);
return;
}
#if defined(LIBVA_FOUND)
// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license
if (hw_frame->format == AV_PIX_FMT_VAAPI) {
sw_frame = av_frame_alloc();
ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed");
if (av_codec_ctx->hw_device_ctx) {
final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
// Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
// because Intel drivers crash unless using AV_PIX_FMT_NV12
sw_frame->format = AV_PIX_FMT_NV12;
const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0);
ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret);
av_frame_free(&hw_frame);
final_frame->format = PREFERRED_GPU_FMT;
const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
} else {
final_frame = std::move(initial_frame);
}
#endif
if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) {
UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format);
av_frame_free(&sw_frame);
if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
return;
}
av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter});
av_frames.push(std::move(final_frame));
if (av_frames.size() > 10) {
LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
av_frames.pop();

View File

@@ -5,6 +5,7 @@
#pragma once
#include <memory>
#include <string_view>
#include <queue>
#include "common/common_types.h"
#include "video_core/command_classes/nvdec_common.h"
@@ -50,18 +51,23 @@ public:
/// Returns the value of current_codec
[[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
/// Return name of the current codec
[[nodiscard]] std::string_view GetCurrentCodecName() const;
private:
void InitializeHwdec();
void InitializeAvCodecContext();
void InitializeGpuDecoder();
bool CreateGpuAvDevice();
bool initialized{};
NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
AVCodec* av_codec{nullptr};
AVBufferRef* av_hw_device{nullptr};
AVCodecContext* av_codec_ctx{nullptr};
AVBufferRef* av_gpu_decoder{nullptr};
GPU& gpu;
const NvdecCommon::NvdecRegisters& state;

View File

@@ -95,7 +95,8 @@ const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegister
const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
(context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
writer.WriteUe(16);
// TODO (ameerj): Where do we get this number, it seems to be particular for each stream
writer.WriteUe(6); // Max number of reference frames
writer.WriteBit(false);
writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
writer.WriteUe(pic_height - 1);

View File

@@ -742,6 +742,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
ASSERT(!current_frame_info.segment_enabled);
uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);

View File

@@ -22,7 +22,7 @@ struct Vp9FrameDimensions {
};
static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
enum FrameFlags : u32 {
enum class FrameFlags : u32 {
IsKeyFrame = 1 << 0,
LastFrameIsKeyFrame = 1 << 1,
FrameSizeChanged = 1 << 2,
@@ -30,6 +30,7 @@ enum FrameFlags : u32 {
LastShowFrame = 1 << 4,
IntraOnly = 1 << 5,
};
DECLARE_ENUM_FLAG_OPERATORS(FrameFlags)
enum class TxSize {
Tx4x4 = 0, // 4x4 transform
@@ -92,44 +93,34 @@ struct Vp9EntropyProbs {
static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
struct Vp9PictureInfo {
bool is_key_frame;
bool intra_only;
bool last_frame_was_key;
bool frame_size_changed;
bool error_resilient_mode;
bool last_frame_shown;
bool show_frame;
u32 bitstream_size;
std::array<u64, 4> frame_offsets;
std::array<s8, 4> ref_frame_sign_bias;
s32 base_q_index;
s32 y_dc_delta_q;
s32 uv_dc_delta_q;
s32 uv_ac_delta_q;
bool lossless;
s32 transform_mode;
bool allow_high_precision_mv;
s32 interp_filter;
s32 reference_mode;
s8 comp_fixed_ref;
std::array<s8, 2> comp_var_ref;
s32 log2_tile_cols;
s32 log2_tile_rows;
bool segment_enabled;
bool segment_map_update;
bool segment_map_temporal_update;
s32 segment_abs_delta;
std::array<u32, 8> segment_feature_enable;
std::array<std::array<s16, 4>, 8> segment_feature_data;
bool mode_ref_delta_enabled;
bool use_prev_in_find_mv_refs;
std::array<s8, 4> ref_deltas;
std::array<s8, 2> mode_deltas;
Vp9EntropyProbs entropy;
Vp9FrameDimensions frame_size;
u8 first_level;
u8 sharpness_level;
u32 bitstream_size;
std::array<u64, 4> frame_offsets;
std::array<bool, 4> refresh_frame;
bool is_key_frame;
bool intra_only;
bool last_frame_was_key;
bool error_resilient_mode;
bool last_frame_shown;
bool show_frame;
bool lossless;
bool allow_high_precision_mv;
bool segment_enabled;
bool mode_ref_delta_enabled;
};
struct Vp9FrameContainer {
@@ -145,7 +136,7 @@ struct PictureInfo {
Vp9FrameDimensions golden_frame_size; ///< 0x50
Vp9FrameDimensions alt_frame_size; ///< 0x58
Vp9FrameDimensions current_frame_size; ///< 0x60
u32 vp9_flags; ///< 0x68
FrameFlags vp9_flags; ///< 0x68
std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
u8 first_level; ///< 0x70
u8 sharpness_level; ///< 0x71
@@ -158,60 +149,43 @@ struct PictureInfo {
u8 allow_high_precision_mv; ///< 0x78
u8 interp_filter; ///< 0x79
u8 reference_mode; ///< 0x7A
s8 comp_fixed_ref; ///< 0x7B
std::array<s8, 2> comp_var_ref; ///< 0x7C
INSERT_PADDING_BYTES_NOINIT(3); ///< 0x7B
u8 log2_tile_cols; ///< 0x7E
u8 log2_tile_rows; ///< 0x7F
Segmentation segmentation; ///< 0x80
LoopFilter loop_filter; ///< 0xE4
INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB
u32 surface_params; ///< 0xF0
INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4
INSERT_PADDING_BYTES_NOINIT(21); ///< 0xEB
[[nodiscard]] Vp9PictureInfo Convert() const {
return {
.is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
.intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
.last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
.frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
.error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
.last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
.show_frame = true,
.bitstream_size = bitstream_size,
.frame_offsets{},
.ref_frame_sign_bias = ref_frame_sign_bias,
.base_q_index = base_q_index,
.y_dc_delta_q = y_dc_delta_q,
.uv_dc_delta_q = uv_dc_delta_q,
.uv_ac_delta_q = uv_ac_delta_q,
.lossless = lossless != 0,
.transform_mode = tx_mode,
.allow_high_precision_mv = allow_high_precision_mv != 0,
.interp_filter = interp_filter,
.reference_mode = reference_mode,
.comp_fixed_ref = comp_fixed_ref,
.comp_var_ref = comp_var_ref,
.log2_tile_cols = log2_tile_cols,
.log2_tile_rows = log2_tile_rows,
.segment_enabled = segmentation.enabled != 0,
.segment_map_update = segmentation.update_map != 0,
.segment_map_temporal_update = segmentation.temporal_update != 0,
.segment_abs_delta = segmentation.abs_delta,
.segment_feature_enable = segmentation.feature_mask,
.segment_feature_data = segmentation.feature_data,
.mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
.use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
!(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
!(vp9_flags == (FrameFlags::IntraOnly)) &&
(vp9_flags == (FrameFlags::LastShowFrame)) &&
!(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
.ref_deltas = loop_filter.ref_deltas,
.mode_deltas = loop_filter.mode_deltas,
.entropy{},
.frame_size = current_frame_size,
.first_level = first_level,
.sharpness_level = sharpness_level,
.bitstream_size = bitstream_size,
.frame_offsets{},
.refresh_frame{},
.is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame),
.intra_only = True(vp9_flags & FrameFlags::IntraOnly),
.last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame),
.error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode),
.last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame),
.show_frame = true,
.lossless = lossless != 0,
.allow_high_precision_mv = allow_high_precision_mv != 0,
.segment_enabled = segmentation.enabled != 0,
.mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
};
}
};
@@ -316,7 +290,6 @@ ASSERT_POSITION(last_frame_size, 0x48);
ASSERT_POSITION(first_level, 0x70);
ASSERT_POSITION(segmentation, 0x80);
ASSERT_POSITION(loop_filter, 0xE4);
ASSERT_POSITION(surface_params, 0xF0);
#undef ASSERT_POSITION
#define ASSERT_POSITION(field_name, position) \

View File

@@ -96,12 +96,11 @@ void Vic::Execute() {
if (!converted_frame_buffer) {
converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free};
}
const int converted_stride{frame->width * 4};
const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0};
u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height,
&converted_frame_buf_addr, &converted_stride);
&converted_frame_buf_addr, converted_stride.data());
const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
if (blk_kind != 0) {

View File

@@ -475,10 +475,10 @@ public:
// These values are used by Nouveau and some games.
AddGL = 0x8006,
SubtractGL = 0x8007,
ReverseSubtractGL = 0x8008,
MinGL = 0x800a,
MaxGL = 0x800b
MinGL = 0x8007,
MaxGL = 0x8008,
SubtractGL = 0x800a,
ReverseSubtractGL = 0x800b
};
enum class Factor : u32 {

View File

@@ -6,7 +6,7 @@
#include <array>
#include <bitset>
#include <xbyak.h>
#include <xbyak/xbyak.h>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/x64/xbyak_abi.h"

View File

@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
@@ -463,6 +465,7 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
++page_index;
page_offset = 0;
remaining_size -= num_bytes;
old_page_addr = page_addr;
}
split();
return result;

View File

@@ -156,6 +156,10 @@ public:
return shader_backend;
}
bool IsAmd() const {
return vendor_name == "ATI Technologies Inc.";
}
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();

View File

@@ -566,10 +566,8 @@ void RasterizerOpenGL::SyncViewport() {
if (regs.screen_y_control.y_negate != 0) {
flip_y = !flip_y;
}
const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
state_tracker.ClipControl(origin, depth);
state_tracker.SetOrigin(origin);
state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
}
@@ -593,6 +591,15 @@ void RasterizerOpenGL::SyncViewport() {
const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
const GLdouble far_depth = src.translate_z + src.scale_z;
if (i == 0) {
const auto& viewports = regs.viewports[i];
const GLenum depth = viewports.depth_range_near != src.translate_z &&
viewports.depth_range_far != src.translate_z
? GL_NEGATIVE_ONE_TO_ONE
: GL_ZERO_TO_ONE;
state_tracker.SetDepthMode(depth);
}
if (device.HasDepthBufferFloat()) {
glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth);
} else {

View File

@@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
host_info{
.support_float16 = false,
.support_int64 = device.HasShaderInt64(),
.needs_demote_reorder = device.IsAmd(),
} {
if (use_asynchronous_shaders) {
workers = CreateWorkers();

View File

@@ -112,6 +112,22 @@ public:
glClipControl(origin, depth);
}
void SetOrigin(GLenum new_origin) {
if (new_origin == origin) {
return;
}
origin = new_origin;
glClipControl(origin, depth);
}
void SetDepthMode(GLenum new_depth) {
if (new_depth == depth) {
return;
}
depth = new_depth;
glClipControl(origin, depth);
}
void SetYNegate(bool new_y_negate) {
if (new_y_negate == y_negate) {
return;

View File

@@ -164,7 +164,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
blit_screen.Recreate();
}
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
scheduler.Flush(render_semaphore);
const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
scheduler.Flush(render_semaphore, present_semaphore);
scheduler.WaitWorker();
swapchain.Present(render_semaphore);

View File

@@ -159,11 +159,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
const size_t size_bytes = GetSizeInBytes(framebuffer);
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
const u64 size_bytes{Tegra::Texture::CalculateSize(true, bytes_per_pixel,
framebuffer.stride, framebuffer.height,
1, block_height_log2, 0)};
Tegra::Texture::UnswizzleTexture(
mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
@@ -356,7 +358,7 @@ void VKBlitScreen::CreateDescriptorPool() {
void VKBlitScreen::CreateRenderPass() {
const VkAttachmentDescription color_attachment{
.flags = 0,
.format = swapchain.GetImageFormat(),
.format = swapchain.GetImageViewFormat(),
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <mutex>
#include <span>
#include <vector>
@@ -18,7 +19,6 @@ namespace Vulkan {
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
constexpr size_t SETS_GROW_RATE = 16;
constexpr s32 SCORE_THRESHOLD = 3;
constexpr u32 SETS_PER_POOL = 64;
struct DescriptorBank {
DescriptorBankInfo info;
@@ -58,11 +58,12 @@ static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
static void AllocatePool(const Device& device, DescriptorBank& bank) {
std::array<VkDescriptorPoolSize, 6> pool_sizes;
size_t pool_cursor{};
const u32 sets_per_pool = device.GetSetsPerPool();
const auto add = [&](VkDescriptorType type, u32 count) {
if (count > 0) {
pool_sizes[pool_cursor++] = {
.type = type,
.descriptorCount = count * SETS_PER_POOL,
.descriptorCount = count * sets_per_pool,
};
}
};
@@ -77,7 +78,7 @@ static void AllocatePool(const Device& device, DescriptorBank& bank) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = SETS_PER_POOL,
.maxSets = sets_per_pool,
.poolSizeCount = static_cast<u32>(pool_cursor),
.pPoolSizes = std::data(pool_sizes),
}));

View File

@@ -281,7 +281,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
.supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
.unified_descriptor_binding = true,
.support_descriptor_aliasing = true,
.support_int8 = true,
.support_int8 = device.IsInt8Supported(),
.support_int16 = device.IsShaderInt16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
.support_vertex_instance_id = false,
@@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
host_info = Shader::HostTranslateInfo{
.support_float16 = device.IsFloat16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
.needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
};
}

View File

@@ -228,9 +228,7 @@ void RasterizerVulkan::Clear() {
};
const u32 color_attachment = regs.clear_buffers.RT;
const auto attachment_aspect_mask = framebuffer->ImageRanges()[color_attachment].aspectMask;
const bool is_color_rt = (attachment_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
if (use_color && is_color_rt) {
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
VkClearValue clear_value;
std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
@@ -248,12 +246,15 @@ void RasterizerVulkan::Clear() {
return;
}
VkImageAspectFlags aspect_flags = 0;
if (use_depth) {
if (use_depth && framebuffer->HasAspectDepthBit()) {
aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT;
}
if (use_stencil) {
if (use_stencil && framebuffer->HasAspectStencilBit()) {
aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
if (aspect_flags == 0) {
return;
}
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
VkClearAttachment attachment;
@@ -764,12 +765,7 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
const Maxwell::StencilOp zpass = regs.stencil_front_op_zpass;
const Maxwell::ComparisonOp compare = regs.stencil_front_func_func;
if (regs.stencil_two_side_enable) {
scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
MaxwellToVK::ComparisonOp(compare));
});
} else {
// Separate stencil op per face
const Maxwell::StencilOp back_fail = regs.stencil_back_op_fail;
const Maxwell::StencilOp back_zfail = regs.stencil_back_op_zfail;
const Maxwell::StencilOp back_zpass = regs.stencil_back_op_zpass;
@@ -784,6 +780,13 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
MaxwellToVK::StencilOp(back_zfail),
MaxwellToVK::ComparisonOp(back_compare));
});
} else {
// Front face defines the stencil op of both faces
scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
MaxwellToVK::ComparisonOp(compare));
});
}
}

View File

@@ -55,14 +55,14 @@ VKScheduler::~VKScheduler() {
worker_thread.join();
}
void VKScheduler::Flush(VkSemaphore semaphore) {
SubmitExecution(semaphore);
void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
SubmitExecution(signal_semaphore, wait_semaphore);
AllocateNewContext();
}
void VKScheduler::Finish(VkSemaphore semaphore) {
void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
const u64 presubmit_tick = CurrentTick();
SubmitExecution(semaphore);
SubmitExecution(signal_semaphore, wait_semaphore);
WaitWorker();
Wait(presubmit_tick);
AllocateNewContext();
@@ -171,37 +171,41 @@ void VKScheduler::AllocateWorkerCommandBuffer() {
});
}
void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
EndPendingOperations();
InvalidateState();
const u64 signal_value = master_semaphore->NextTick();
Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
cmdbuf.End();
const u32 num_signal_semaphores = semaphore ? 2U : 1U;
const u64 wait_value = signal_value - 1;
const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
const VkSemaphore timeline_semaphore = master_semaphore->Handle();
const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
const std::array signal_values{signal_value, u64(0)};
const std::array signal_semaphores{timeline_semaphore, semaphore};
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
const std::array wait_values{signal_value - 1, u64(1)};
const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
};
const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
.pNext = nullptr,
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &wait_value,
.waitSemaphoreValueCount = num_wait_semaphores,
.pWaitSemaphoreValues = wait_values.data(),
.signalSemaphoreValueCount = num_signal_semaphores,
.pSignalSemaphoreValues = signal_values.data(),
};
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &timeline_si,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &timeline_semaphore,
.pWaitDstStageMask = &wait_stage_mask,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),
.signalSemaphoreCount = num_signal_semaphores,

View File

@@ -34,10 +34,10 @@ public:
~VKScheduler();
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore semaphore = nullptr);
void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(VkSemaphore semaphore = nullptr);
void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
@@ -191,7 +191,7 @@ private:
void AllocateWorkerCommandBuffer();
void SubmitExecution(VkSemaphore semaphore);
void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
void AllocateNewContext();

View File

@@ -110,10 +110,6 @@ public:
return Exchange(Dirty::DepthTestEnable, false);
}
bool TouchDepthBoundsEnable() {
return Exchange(Dirty::DepthBoundsEnable, false);
}
bool TouchDepthWriteEnable() {
return Exchange(Dirty::DepthWriteEnable, false);
}

View File

@@ -20,16 +20,15 @@ namespace Vulkan {
namespace {
VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) {
VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) {
if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
VkSurfaceFormatKHR format;
format.format = VK_FORMAT_B8G8R8A8_UNORM;
format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
return format;
}
const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
return format.format == request_format &&
const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
return format.format == VK_FORMAT_B8G8R8A8_UNORM &&
format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
});
return found != formats.end() ? *found : formats[0];
@@ -107,14 +106,12 @@ void VKSwapchain::AcquireNextImage() {
}
void VKSwapchain::Present(VkSemaphore render_semaphore) {
const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
const auto present_queue{device.GetPresentQueue()};
const VkPresentInfoKHR present_info{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pNext = nullptr,
.waitSemaphoreCount = render_semaphore ? 2U : 1U,
.pWaitSemaphores = semaphores.data(),
.waitSemaphoreCount = render_semaphore ? 1U : 0U,
.pWaitSemaphores = &render_semaphore,
.swapchainCount = 1,
.pSwapchains = swapchain.address(),
.pImageIndices = &image_index,
@@ -145,7 +142,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
u32 requested_image_count{capabilities.minImageCount + 1};
@@ -180,6 +177,17 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
}
static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
VkImageFormatListCreateInfo format_list{
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
.pNext = nullptr,
.viewFormatCount = static_cast<u32>(view_formats.size()),
.pViewFormats = view_formats.data(),
};
if (device.IsKhrSwapchainMutableFormatEnabled()) {
format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list);
swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR;
}
// Request the size again to reduce the possibility of a TOCTOU race condition.
const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
@@ -191,7 +199,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
images = swapchain.GetImages();
image_count = static_cast<u32>(images.size());
image_format = surface_format.format;
image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
}
void VKSwapchain::CreateSemaphores() {
@@ -207,7 +215,7 @@ void VKSwapchain::CreateImageViews() {
.flags = 0,
.image = {},
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = image_format,
.format = image_view_format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,

View File

@@ -68,8 +68,12 @@ public:
return *image_views[index];
}
VkFormat GetImageFormat() const {
return image_format;
VkFormat GetImageViewFormat() const {
return image_view_format;
}
VkSemaphore CurrentPresentSemaphore() const {
return *present_semaphores[frame_index];
}
private:
@@ -96,7 +100,7 @@ private:
u32 image_index{};
u32 frame_index{};
VkFormat image_format{};
VkFormat image_view_format{};
VkExtent2D extent{};
bool current_srgb{};

View File

@@ -1186,9 +1186,12 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
renderpass_key.depth_format = depth_buffer->format;
num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
images[num_images] = depth_buffer->ImageHandle();
image_ranges[num_images] = MakeSubresourceRange(depth_buffer);
const VkImageSubresourceRange subresource_range = MakeSubresourceRange(depth_buffer);
image_ranges[num_images] = subresource_range;
samples = depth_buffer->Samples();
++num_images;
has_depth = (subresource_range.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0;
has_stencil = (subresource_range.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0;
} else {
renderpass_key.depth_format = PixelFormat::Invalid;
}

View File

@@ -232,6 +232,18 @@ public:
return image_ranges;
}
[[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
}
[[nodiscard]] bool HasAspectDepthBit() const noexcept {
return has_depth;
}
[[nodiscard]] bool HasAspectStencilBit() const noexcept {
return has_stencil;
}
private:
vk::Framebuffer framebuffer;
VkRenderPass renderpass{};
@@ -241,6 +253,8 @@ private:
u32 num_images = 0;
std::array<VkImage, 9> images{};
std::array<VkImageSubresourceRange, 9> image_ranges{};
bool has_depth{};
bool has_stencil{};
};
struct TextureCacheParams {

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <filesystem>
#include <fstream>
#include <memory>

View File

@@ -80,7 +80,7 @@ struct ImageBase {
VAddr cpu_addr_end = 0;
u64 modification_tick = 0;
u64 frame_tick = 0;
size_t lru_index = SIZE_MAX;
std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};

View File

@@ -4,6 +4,7 @@
#pragma once
#include <algorithm>
#include <array>
#include <bit>
#include <concepts>

View File

@@ -5,7 +5,6 @@
#pragma once
#include "common/alignment.h"
#include "common/settings.h"
#include "video_core/dirty_flags.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/texture_cache_base.h"
@@ -43,8 +42,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
void(slot_image_views.insert(runtime, NullImageParams{}));
void(slot_samplers.insert(runtime, sampler_descriptor));
deletion_iterator = slot_images.begin();
if constexpr (HAS_DEVICE_MEMORY_INFO) {
const auto device_memory = runtime.GetDeviceLocalMemory();
const u64 possible_expected_memory = (device_memory * 3) / 10;
@@ -64,70 +61,38 @@ template <class P>
void TextureCache<P>::RunGarbageCollector() {
const bool high_priority_mode = total_used_memory >= expected_memory;
const bool aggressive_mode = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
for (; num_iterations > 0; --num_iterations) {
if (deletion_iterator == slot_images.end()) {
deletion_iterator = slot_images.begin();
if (deletion_iterator == slot_images.end()) {
break;
}
const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5);
const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
if (num_iterations == 0) {
return true;
}
auto [image_id, image_tmp] = *deletion_iterator;
Image* image = image_tmp; // fix clang error.
const bool is_alias = True(image->flags & ImageFlagBits::Alias);
const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
const bool must_download = image->IsSafeDownload();
bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
const u64 ticks_needed =
is_bad_overlap
? ticks_to_destroy >> 4
: ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
should_care |= aggressive_mode;
if (should_care && image->frame_tick + ticks_needed < frame_tick) {
if (is_bad_overlap) {
const bool overlap_check = std::ranges::all_of(
image->overlapping_images, [&, image](const ImageId& overlap_id) {
auto& overlap = slot_images[overlap_id];
return overlap.frame_tick >= image->frame_tick;
});
if (!overlap_check) {
++deletion_iterator;
continue;
}
}
if (!is_bad_overlap && must_download) {
const bool alias_check = std::ranges::none_of(
image->aliased_images, [&, image](const AliasedImage& alias) {
auto& alias_image = slot_images[alias.id];
return (alias_image.frame_tick < image->frame_tick) ||
(alias_image.modification_tick < image->modification_tick);
});
if (alias_check) {
auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image->info);
image->DownloadMemory(map, copies);
runtime.Finish();
SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
}
}
if (True(image->flags & ImageFlagBits::Tracked)) {
UntrackImage(*image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id);
if (is_bad_overlap) {
++num_iterations;
}
--num_iterations;
auto& image = slot_images[image_id];
const bool must_download = image.IsSafeDownload();
if (!high_priority_mode && must_download) {
return false;
}
++deletion_iterator;
}
if (must_download) {
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id);
return false;
};
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
}
template <class P>
void TextureCache<P>::TickFrame() {
if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
if (total_used_memory > minimum_memory) {
RunGarbageCollector();
}
sentenced_images.Tick();
@@ -1078,6 +1043,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory += Common::AlignUp(tentative_size, 1024);
image.lru_index = lru_cache.Insert(image_id, frame_tick);
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
[this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
if (False(image.flags & ImageFlagBits::Sparse)) {
@@ -1115,6 +1082,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory -= Common::AlignUp(tentative_size, 1024);
lru_cache.Free(image.lru_index);
const auto& clear_page_table =
[this, image_id](
u64 page,
@@ -1384,7 +1352,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
if (is_modification) {
MarkModification(image);
}
image.frame_tick = frame_tick;
lru_cache.Touch(image.lru_index, frame_tick);
}
template <class P>

View File

@@ -14,6 +14,7 @@
#include "common/common_types.h"
#include "common/literals.h"
#include "common/lru_cache.h"
#include "video_core/compatible_formats.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/engines/fermi_2d.h"
@@ -370,6 +371,12 @@ private:
std::vector<ImageId> uncommitted_downloads;
std::queue<std::vector<ImageId>> committed_downloads;
struct LRUItemParams {
using ObjectType = ImageId;
using TickType = u64;
};
Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
static constexpr size_t TICKS_TO_DESTROY = 6;
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
@@ -379,7 +386,6 @@ private:
u64 modification_tick = 0;
u64 frame_tick = 0;
typename SlotVector<Image>::Iterator deletion_iterator;
};
} // namespace VideoCommon

Some files were not shown because too many files have changed in this diff Show More